| Directory: | ./ |
|---|---|
| File: | strings/ctype-uca.cc |
| Date: | 2022-12-06 21:40:42 |
| Exec | Total | Coverage | |
|---|---|---|---|
| Lines: | 1622 | 1782 | 91.0% |
| Branches: | 2616 | 9044 | 28.9% |
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* Copyright (c) 2004, 2022, Oracle and/or its affiliates. | ||
| 2 | |||
| 3 | This library is free software; you can redistribute it and/or | ||
| 4 | modify it under the terms of the GNU Library General Public | ||
| 5 | License, version 2.0, as published by the Free Software Foundation. | ||
| 6 | |||
| 7 | This library is also distributed with certain software (including | ||
| 8 | but not limited to OpenSSL) that is licensed under separate terms, | ||
| 9 | as designated in a particular file or component or in included license | ||
| 10 | documentation. The authors of MySQL hereby grant you an additional | ||
| 11 | permission to link the library and your derivative works with the | ||
| 12 | separately licensed software that they have included with MySQL. | ||
| 13 | |||
| 14 | Without limiting anything contained in the foregoing, this file, | ||
| 15 | which is part of C Driver for MySQL (Connector/C), is also subject to the | ||
| 16 | Universal FOSS Exception, version 1.0, a copy of which can be found at | ||
| 17 | http://oss.oracle.com/licenses/universal-foss-exception. | ||
| 18 | |||
| 19 | This library is distributed in the hope that it will be useful, | ||
| 20 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 22 | Library General Public License, version 2.0, for more details. | ||
| 23 | |||
| 24 | You should have received a copy of the GNU Library General Public | ||
| 25 | License along with this library; if not, write to the Free | ||
| 26 | Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, | ||
| 27 | MA 02110-1301 USA */ | ||
| 28 | |||
| 29 | /* | ||
| 30 | UCA (Unicode Collation Algorithm) support. | ||
| 31 | |||
| 32 | Features that are not implemented yet: | ||
| 33 | - No Normalization From D is done | ||
| 34 | + No decomposition is done | ||
| 35 | + No Thai/Lao orderding is done | ||
| 36 | - No combining marks processing is done | ||
| 37 | */ | ||
| 38 | |||
| 39 | #include <assert.h> | ||
| 40 | #include <stdio.h> | ||
| 41 | #include <string.h> | ||
| 42 | #include <sys/types.h> | ||
| 43 | #include <algorithm> | ||
| 44 | #include <bitset> | ||
| 45 | #include <iterator> | ||
| 46 | #include <map> | ||
| 47 | #include <utility> | ||
| 48 | |||
| 49 | #include "m_ctype.h" | ||
| 50 | #include "m_string.h" | ||
| 51 | #include "my_byteorder.h" | ||
| 52 | #include "my_compiler.h" | ||
| 53 | |||
| 54 | #include "my_inttypes.h" | ||
| 55 | #include "my_loglevel.h" | ||
| 56 | #include "my_macros.h" | ||
| 57 | #include "mysys_err.h" | ||
| 58 | #include "strings/mb_wc.h" | ||
| 59 | #include "strings/str_uca_type.h" | ||
| 60 | #include "strings/uca900_data.h" | ||
| 61 | #include "strings/uca900_ja_data.h" | ||
| 62 | #include "strings/uca900_zh_data.h" | ||
| 63 | #include "strings/uca_data.h" | ||
| 64 | #include "template_utils.h" | ||
| 65 | |||
| 66 | MY_UCA_INFO my_uca_v400 = { | ||
| 67 | UCA_V400, | ||
| 68 | |||
| 69 | 0xFFFF, /* maxchar */ | ||
| 70 | uca_length, uca_weight, false, nullptr, /* contractions */ | ||
| 71 | nullptr, | ||
| 72 | |||
| 73 | /* Logical positions */ | ||
| 74 | 0x0009, /* first_non_ignorable p != ignore */ | ||
| 75 | 0xA48C, /* last_non_ignorable Not a CJK and not UNASSIGNED */ | ||
| 76 | |||
| 77 | 0x0332, /* first_primary_ignorable p == 0 */ | ||
| 78 | 0x20EA, /* last_primary_ignorable */ | ||
| 79 | |||
| 80 | 0x0000, /* first_secondary_ignorable p,s == 0 */ | ||
| 81 | 0xFE73, /* last_secondary_ignorable p,s == 0 */ | ||
| 82 | |||
| 83 | 0x0000, /* first_tertiary_ignorable p,s,t == 0 */ | ||
| 84 | 0xFE73, /* last_tertiary_ignorable p,s,t == 0 */ | ||
| 85 | |||
| 86 | 0x0000, /* first_trailing */ | ||
| 87 | 0x0000, /* last_trailing */ | ||
| 88 | |||
| 89 | 0x0009, /* first_variable */ | ||
| 90 | 0x2183, /* last_variable */ | ||
| 91 | 0, /* extra_ce_pri_base, not used */ | ||
| 92 | 0, /* extra_ce_sec_base, not used */ | ||
| 93 | 0 /* extra_ce_ter_base, not used */ | ||
| 94 | }; | ||
| 95 | |||
| 96 | /******************************************************/ | ||
| 97 | |||
| 98 | MY_UCA_INFO my_uca_v520 = { | ||
| 99 | UCA_V520, | ||
| 100 | |||
| 101 | 0x10FFFF, /* maxchar */ | ||
| 102 | uca520_length, | ||
| 103 | uca520_weight, | ||
| 104 | false, | ||
| 105 | nullptr, /* contractions */ | ||
| 106 | nullptr, | ||
| 107 | |||
| 108 | 0x0009, /* first_non_ignorable p != ignore */ | ||
| 109 | 0x1342E, /* last_non_ignorable Not a CJK and not UASSIGNED */ | ||
| 110 | |||
| 111 | 0x0332, /* first_primary_ignorable p == ignore */ | ||
| 112 | 0x101FD, /* last_primary_ignorable */ | ||
| 113 | |||
| 114 | 0x0000, /* first_secondary_ignorable p,s= ignore */ | ||
| 115 | 0xFE73, /* last_secondary_ignorable */ | ||
| 116 | |||
| 117 | 0x0000, /* first_tertiary_ignorable p,s,t == ignore */ | ||
| 118 | 0xFE73, /* last_tertiary_ignorable */ | ||
| 119 | |||
| 120 | 0x0000, /* first_trailing */ | ||
| 121 | 0x0000, /* last_trailing */ | ||
| 122 | |||
| 123 | 0x0009, /* first_variable if alt=non-ignorable: p != ignore */ | ||
| 124 | 0x1D371, /* last_variable if alt=shifter: p,s,t == ignore */ | ||
| 125 | 0, /* extra_ce_pri_base, not used */ | ||
| 126 | 0, /* extra_ce_sec_base, not used */ | ||
| 127 | 0 /* extra_ce_ter_base, not used */ | ||
| 128 | }; | ||
| 129 | |||
| 130 | /******************************************************/ | ||
| 131 | |||
| 132 | /* | ||
| 133 | German Phonebook | ||
| 134 | */ | ||
| 135 | static const char german2[] = | ||
| 136 | "&AE << \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 " | ||
| 137 | "&OE << \\u0153 <<< \\u0152 << \\u00F6 <<< \\u00D6 " | ||
| 138 | "&UE << \\u00FC <<< \\u00DC "; | ||
| 139 | |||
| 140 | /* | ||
| 141 | Some sources treat LETTER A WITH DIAERESIS (00E4,00C4) | ||
| 142 | secondary greater than LETTER AE (00E6,00C6). | ||
| 143 | http://www.evertype.com/alphabets/icelandic.pdf | ||
| 144 | http://developer.mimer.com/collations/charts/icelandic.htm | ||
| 145 | |||
| 146 | Other sources do not provide any special rules | ||
| 147 | for LETTER A WITH DIAERESIS: | ||
| 148 | http://www.omniglot.com/writing/icelandic.htm | ||
| 149 | http://en.wikipedia.org/wiki/Icelandic_alphabet | ||
| 150 | http://oss.software.ibm.com/icu/charts/collation/is.html | ||
| 151 | |||
| 152 | Let's go the first way. | ||
| 153 | */ | ||
| 154 | |||
| 155 | static const char icelandic[] = | ||
| 156 | "& A < \\u00E1 <<< \\u00C1 " | ||
| 157 | "& D < \\u00F0 <<< \\u00D0 " | ||
| 158 | "& E < \\u00E9 <<< \\u00C9 " | ||
| 159 | "& I < \\u00ED <<< \\u00CD " | ||
| 160 | "& O < \\u00F3 <<< \\u00D3 " | ||
| 161 | "& U < \\u00FA <<< \\u00DA " | ||
| 162 | "& Y < \\u00FD <<< \\u00DD " | ||
| 163 | "& Z < \\u00FE <<< \\u00DE " | ||
| 164 | "< \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 " | ||
| 165 | "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 " | ||
| 166 | "< \\u00E5 <<< \\u00C5 "; | ||
| 167 | |||
| 168 | /* | ||
| 169 | Some sources treat I and Y primary different. | ||
| 170 | Other sources treat I and Y the same on primary level. | ||
| 171 | We'll go the first way. | ||
| 172 | */ | ||
| 173 | |||
| 174 | static const char latvian[] = | ||
| 175 | "& C < \\u010D <<< \\u010C " | ||
| 176 | "& G < \\u0123 <<< \\u0122 " | ||
| 177 | "& I < \\u0079 <<< \\u0059 " | ||
| 178 | "& K < \\u0137 <<< \\u0136 " | ||
| 179 | "& L < \\u013C <<< \\u013B " | ||
| 180 | "& N < \\u0146 <<< \\u0145 " | ||
| 181 | "& R < \\u0157 <<< \\u0156 " | ||
| 182 | "& S < \\u0161 <<< \\u0160 " | ||
| 183 | "& Z < \\u017E <<< \\u017D "; | ||
| 184 | |||
| 185 | static const char romanian[] = | ||
| 186 | "& A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 " | ||
| 187 | "& I < \\u00EE <<< \\u00CE " | ||
| 188 | "& S < \\u0219 <<< \\u0218 << \\u015F <<< \\u015E " | ||
| 189 | "& T < \\u021B <<< \\u021A << \\u0163 <<< \\u0162 "; | ||
| 190 | |||
| 191 | static const char slovenian[] = | ||
| 192 | "& C < \\u010D <<< \\u010C " | ||
| 193 | "& S < \\u0161 <<< \\u0160 " | ||
| 194 | "& Z < \\u017E <<< \\u017D "; | ||
| 195 | |||
| 196 | static const char polish[] = | ||
| 197 | "& A < \\u0105 <<< \\u0104 " | ||
| 198 | "& C < \\u0107 <<< \\u0106 " | ||
| 199 | "& E < \\u0119 <<< \\u0118 " | ||
| 200 | "& L < \\u0142 <<< \\u0141 " | ||
| 201 | "& N < \\u0144 <<< \\u0143 " | ||
| 202 | "& O < \\u00F3 <<< \\u00D3 " | ||
| 203 | "& S < \\u015B <<< \\u015A " | ||
| 204 | "& Z < \\u017A <<< \\u0179 < \\u017C <<< \\u017B"; | ||
| 205 | |||
| 206 | static const char estonian[] = | ||
| 207 | "& S < \\u0161 <<< \\u0160 " | ||
| 208 | " < \\u007A <<< \\u005A " | ||
| 209 | " < \\u017E <<< \\u017D " | ||
| 210 | "& W < \\u00F5 <<< \\u00D5 " | ||
| 211 | "< \\u00E4 <<< \\u00C4 " | ||
| 212 | "< \\u00F6 <<< \\u00D6 " | ||
| 213 | "< \\u00FC <<< \\u00DC "; | ||
| 214 | |||
| 215 | // Standard Spanish, also for Galician. | ||
| 216 | static const char spanish[] = "& N < \\u00F1 <<< \\u00D1 "; | ||
| 217 | |||
| 218 | /* | ||
| 219 | Some sources treat V and W as similar on primary level. | ||
| 220 | We'll treat V and W as different on primary level. | ||
| 221 | */ | ||
| 222 | |||
| 223 | static const char swedish[] = | ||
| 224 | "& Y <<\\u00FC <<< \\u00DC " | ||
| 225 | "& Z < \\u00E5 <<< \\u00C5 " | ||
| 226 | "< \\u00E4 <<< \\u00C4 << \\u00E6 <<< \\u00C6 " | ||
| 227 | "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 "; | ||
| 228 | |||
| 229 | static const char turkish[] = | ||
| 230 | "& C < \\u00E7 <<< \\u00C7 " | ||
| 231 | "& G < \\u011F <<< \\u011E " | ||
| 232 | "& H < \\u0131 <<< \\u0049 " | ||
| 233 | "& O < \\u00F6 <<< \\u00D6 " | ||
| 234 | "& S < \\u015F <<< \\u015E " | ||
| 235 | "& U < \\u00FC <<< \\u00DC "; | ||
| 236 | |||
| 237 | static const char czech[] = | ||
| 238 | "& C < \\u010D <<< \\u010C " | ||
| 239 | "& H < ch <<< Ch <<< CH" | ||
| 240 | "& R < \\u0159 <<< \\u0158" | ||
| 241 | "& S < \\u0161 <<< \\u0160" | ||
| 242 | "& Z < \\u017E <<< \\u017D"; | ||
| 243 | |||
| 244 | static const char danish[] = /* Also good for Norwegian */ | ||
| 245 | "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170" | ||
| 246 | "& Z < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4" | ||
| 247 | " < \\u00F8 <<< \\u00D8 << \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150" | ||
| 248 | " < \\u00E5 <<< \\u00C5 << aa <<< Aa <<< AA"; | ||
| 249 | |||
| 250 | static const char lithuanian[] = | ||
| 251 | "& C << ch <<< Ch <<< CH< \\u010D <<< \\u010C" | ||
| 252 | "& E << \\u0119 <<< \\u0118 << \\u0117 <<< \\u0116" | ||
| 253 | "& I << y <<< Y" | ||
| 254 | "& S < \\u0161 <<< \\u0160" | ||
| 255 | "& Z < \\u017E <<< \\u017D"; | ||
| 256 | |||
| 257 | static const char slovak[] = | ||
| 258 | "& A < \\u00E4 <<< \\u00C4" | ||
| 259 | "& C < \\u010D <<< \\u010C" | ||
| 260 | "& H < ch <<< Ch <<< CH" | ||
| 261 | "& O < \\u00F4 <<< \\u00D4" | ||
| 262 | "& S < \\u0161 <<< \\u0160" | ||
| 263 | "& Z < \\u017E <<< \\u017D"; | ||
| 264 | |||
| 265 | static const char spanish2[] = /* Also good for Asturian and Galician */ | ||
| 266 | "&C < ch <<< Ch <<< CH" | ||
| 267 | "&L < ll <<< Ll <<< LL" | ||
| 268 | "&N < \\u00F1 <<< \\u00D1"; | ||
| 269 | |||
| 270 | static const char roman[] = /* i.e. Classical Latin */ | ||
| 271 | "& I << j <<< J " | ||
| 272 | "& V << u <<< U "; | ||
| 273 | |||
| 274 | /* | ||
| 275 | Persian collation support was provided by | ||
| 276 | Jody McIntyre <mysql@modernduck.com> | ||
| 277 | |||
| 278 | To: internals@lists.mysql.com | ||
| 279 | Subject: Persian UTF8 collation support | ||
| 280 | Date: 17.08.2004 | ||
| 281 | |||
| 282 | Contraction is not implemented. Some implementations do perform | ||
| 283 | contraction but others do not, and it is able to sort all my test | ||
| 284 | strings correctly. | ||
| 285 | |||
| 286 | Jody. | ||
| 287 | */ | ||
| 288 | static const char persian[] = | ||
| 289 | "& \\u066D < \\u064E < \\uFE76 < \\uFE77 < \\u0650 < \\uFE7A < \\uFE7B" | ||
| 290 | " < \\u064F < \\uFE78 < \\uFE79 < \\u064B < \\uFE70 < \\uFE71" | ||
| 291 | " < \\u064D < \\uFE74 < \\u064C < \\uFE72" | ||
| 292 | "& \\uFE7F < \\u0653 < \\u0654 < \\u0655 < \\u0670" | ||
| 293 | "& \\u0669 < \\u0622 < \\u0627 < \\u0671 < \\u0621 < \\u0623 < \\u0625" | ||
| 294 | " < \\u0624 < \\u0626" | ||
| 295 | "& \\u0642 < \\u06A9 < \\u0643" | ||
| 296 | "& \\u0648 < \\u0647 < \\u0629 < \\u06C0 < \\u06CC < \\u0649 < \\u064A" | ||
| 297 | "& \\uFE80 < \\uFE81 < \\uFE82 < \\uFE8D < \\uFE8E < \\uFB50 < \\uFB51" | ||
| 298 | " < \\uFE80 " | ||
| 299 | /* | ||
| 300 | FE80 appears both in reset and shift. | ||
| 301 | We need to break the rule here and reset to *new* FE80 again, | ||
| 302 | so weight for FE83 is calculated as P[FE80]+1, not as P[FE80]+8. | ||
| 303 | */ | ||
| 304 | " & \\uFE80 < \\uFE83 < \\uFE84 < \\uFE87 < \\uFE88 < \\uFE85" | ||
| 305 | " < \\uFE86 < \\u0689 < \\u068A" | ||
| 306 | "& \\uFEAE < \\uFDFC" | ||
| 307 | "& \\uFED8 < \\uFB8E < \\uFB8F < \\uFB90 < \\uFB91 < \\uFED9 < \\uFEDA" | ||
| 308 | " < \\uFEDB < \\uFEDC" | ||
| 309 | "& \\uFEEE < \\uFEE9 < \\uFEEA < \\uFEEB < \\uFEEC < \\uFE93 < \\uFE94" | ||
| 310 | " < \\uFBA4 < \\uFBA5 < \\uFBFC < \\uFBFD < \\uFBFE < \\uFBFF" | ||
| 311 | " < \\uFEEF < \\uFEF0 < \\uFEF1 < \\uFEF2 < \\uFEF3 < \\uFEF4" | ||
| 312 | " < \\uFEF5 < \\uFEF6 < \\uFEF7 < \\uFEF8 < \\uFEF9 < \\uFEFA" | ||
| 313 | " < \\uFEFB < \\uFEFC"; | ||
| 314 | |||
| 315 | /* | ||
| 316 | Esperanto tailoring. | ||
| 317 | Contributed by Bertilo Wennergren <bertilow at gmail dot com> | ||
| 318 | September 1, 2005 | ||
| 319 | */ | ||
| 320 | static const char esperanto[] = | ||
| 321 | "& C < \\u0109 <<< \\u0108" | ||
| 322 | "& G < \\u011D <<< \\u011C" | ||
| 323 | "& H < \\u0125 <<< \\u0124" | ||
| 324 | "& J < \\u0135 <<< \\u0134" | ||
| 325 | "& S < \\u015d <<< \\u015c" | ||
| 326 | "& U < \\u016d <<< \\u016c"; | ||
| 327 | |||
| 328 | /* | ||
| 329 | A simplified version of Hungarian, without consonant contractions. | ||
| 330 | */ | ||
| 331 | static const char hungarian[] = | ||
| 332 | "&O < \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150" | ||
| 333 | "&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170"; | ||
| 334 | |||
| 335 | static const char croatian[] = | ||
| 336 | "&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106" | ||
| 337 | "&D < d\\u017E = \\u01C6 <<< d\\u017D <<< D\\u017E = \\u01C5 <<< D\\u017D " | ||
| 338 | "= \\u01C4" | ||
| 339 | " < \\u0111 <<< \\u0110" | ||
| 340 | "&L < lj = \\u01C9 <<< lJ <<< Lj = \\u01C8 <<< LJ = \\u01C7" | ||
| 341 | "&N < nj = \\u01CC <<< nJ <<< Nj = \\u01CB <<< NJ = \\u01CA" | ||
| 342 | "&S < \\u0161 <<< \\u0160" | ||
| 343 | "&Z < \\u017E <<< \\u017D"; | ||
| 344 | |||
| 345 | /* | ||
| 346 | SCCII Part 1 : Collation Sequence (SLS1134) | ||
| 347 | 2006/11/24 | ||
| 348 | Harshula Jayasuriya <harshula at gmail dot com> | ||
| 349 | Language Technology Research Lab, University of Colombo / ICTA | ||
| 350 | */ | ||
| 351 | #if 0 | ||
| 352 | static const char sinhala[]= | ||
| 353 | "& \\u0D96 < \\u0D82 < \\u0D83" | ||
| 354 | "& \\u0DA5 < \\u0DA4" | ||
| 355 | "& \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3" | ||
| 356 | "& \\u0DDE < \\u0DCA"; | ||
| 357 | #else | ||
| 358 | static const char sinhala[] = | ||
| 359 | "& \\u0D96 < \\u0D82 < \\u0D83 < \\u0D9A < \\u0D9B < \\u0D9C < \\u0D9D" | ||
| 360 | "< \\u0D9E < \\u0D9F < \\u0DA0 < \\u0DA1 < \\u0DA2 < \\u0DA3" | ||
| 361 | "< \\u0DA5 < \\u0DA4 < \\u0DA6" | ||
| 362 | "< \\u0DA7 < \\u0DA8 < \\u0DA9 < \\u0DAA < \\u0DAB < \\u0DAC" | ||
| 363 | "< \\u0DAD < \\u0DAE < \\u0DAF < \\u0DB0 < \\u0DB1" | ||
| 364 | "< \\u0DB3 < \\u0DB4 < \\u0DB5 < \\u0DB6 < \\u0DB7 < \\u0DB8" | ||
| 365 | "< \\u0DB9 < \\u0DBA < \\u0DBB < \\u0DBD < \\u0DC0 < \\u0DC1" | ||
| 366 | "< \\u0DC2 < \\u0DC3 < \\u0DC4 < \\u0DC5 < \\u0DC6" | ||
| 367 | "< \\u0DCF" | ||
| 368 | "< \\u0DD0 < \\u0DD1 < \\u0DD2 < \\u0DD3 < \\u0DD4 < \\u0DD6" | ||
| 369 | "< \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3 < \\u0DD9 < \\u0DDA" | ||
| 370 | "< \\u0DDB < \\u0DDC < \\u0DDD < \\u0DDE < \\u0DCA"; | ||
| 371 | #endif | ||
| 372 | |||
| 373 | static const char vietnamese[] = | ||
| 374 | " &A << \\u00E0 <<< \\u00C0" /* A */ | ||
| 375 | " << \\u1EA3 <<< \\u1EA2" | ||
| 376 | " << \\u00E3 <<< \\u00C3" | ||
| 377 | " << \\u00E1 <<< \\u00C1" | ||
| 378 | " << \\u1EA1 <<< \\u1EA0" | ||
| 379 | " < \\u0103 <<< \\u0102" /* A WITH BREVE */ | ||
| 380 | " << \\u1EB1 <<< \\u1EB0" | ||
| 381 | " << \\u1EB3 <<< \\u1EB2" | ||
| 382 | " << \\u1EB5 <<< \\u1EB4" | ||
| 383 | " << \\u1EAF <<< \\u1EAE" | ||
| 384 | " << \\u1EB7 <<< \\u1EB6" | ||
| 385 | " < \\u00E2 <<< \\u00C2" /* A WITH CIRCUMFLEX */ | ||
| 386 | " << \\u1EA7 <<< \\u1EA6" | ||
| 387 | " << \\u1EA9 <<< \\u1EA8" | ||
| 388 | " << \\u1EAB <<< \\u1EAA" | ||
| 389 | " << \\u1EA5 <<< \\u1EA4" | ||
| 390 | " << \\u1EAD <<< \\u1EAC" | ||
| 391 | " &D < \\u0111 <<< \\u0110" /* D WITH STROKE */ | ||
| 392 | " &E << \\u00E8 <<< \\u00C8" /* E */ | ||
| 393 | " << \\u1EBB <<< \\u1EBA" | ||
| 394 | " << \\u1EBD <<< \\u1EBC" | ||
| 395 | " << \\u00E9 <<< \\u00C9" | ||
| 396 | " << \\u1EB9 <<< \\u1EB8" | ||
| 397 | " < \\u00EA <<< \\u00CA" /* E WITH CIRCUMFLEX */ | ||
| 398 | " << \\u1EC1 <<< \\u1EC0" | ||
| 399 | " << \\u1EC3 <<< \\u1EC2" | ||
| 400 | " << \\u1EC5 <<< \\u1EC4" | ||
| 401 | " << \\u1EBF <<< \\u1EBE" | ||
| 402 | " << \\u1EC7 <<< \\u1EC6" | ||
| 403 | " &I << \\u00EC <<< \\u00CC" /* I */ | ||
| 404 | " << \\u1EC9 <<< \\u1EC8" | ||
| 405 | " << \\u0129 <<< \\u0128" | ||
| 406 | " << \\u00ED <<< \\u00CD" | ||
| 407 | " << \\u1ECB <<< \\u1ECA" | ||
| 408 | " &O << \\u00F2 <<< \\u00D2" /* O */ | ||
| 409 | " << \\u1ECF <<< \\u1ECE" | ||
| 410 | " << \\u00F5 <<< \\u00D5" | ||
| 411 | " << \\u00F3 <<< \\u00D3" | ||
| 412 | " << \\u1ECD <<< \\u1ECC" | ||
| 413 | " < \\u00F4 <<< \\u00D4" /* O WITH CIRCUMFLEX */ | ||
| 414 | " << \\u1ED3 <<< \\u1ED2" | ||
| 415 | " << \\u1ED5 <<< \\u1ED4" | ||
| 416 | " << \\u1ED7 <<< \\u1ED6" | ||
| 417 | " << \\u1ED1 <<< \\u1ED0" | ||
| 418 | " << \\u1ED9 <<< \\u1ED8" | ||
| 419 | " < \\u01A1 <<< \\u01A0" /* O WITH HORN */ | ||
| 420 | " << \\u1EDD <<< \\u1EDC" | ||
| 421 | " << \\u1EDF <<< \\u1EDE" | ||
| 422 | " << \\u1EE1 <<< \\u1EE0" | ||
| 423 | " << \\u1EDB <<< \\u1EDA" | ||
| 424 | " << \\u1EE3 <<< \\u1EE2" | ||
| 425 | " &U << \\u00F9 <<< \\u00D9" /* U */ | ||
| 426 | " << \\u1EE7 <<< \\u1EE6" | ||
| 427 | " << \\u0169 <<< \\u0168" | ||
| 428 | " << \\u00FA <<< \\u00DA" | ||
| 429 | " << \\u1EE5 <<< \\u1EE4" | ||
| 430 | " < \\u01B0 <<< \\u01AF" /* U WITH HORN */ | ||
| 431 | " << \\u1EEB <<< \\u1EEA" | ||
| 432 | " << \\u1EED <<< \\u1EEC" | ||
| 433 | " << \\u1EEF <<< \\u1EEE" | ||
| 434 | " << \\u1EE9 <<< \\u1EE8" | ||
| 435 | " << \\u1EF1 <<< \\u1EF0" | ||
| 436 | " &Y << \\u1EF3 <<< \\u1EF2" /* Y */ | ||
| 437 | " << \\u1EF7 <<< \\u1EF6" | ||
| 438 | " << \\u1EF9 <<< \\u1EF8" | ||
| 439 | " << \\u00FD <<< \\u00DD" | ||
| 440 | " << \\u1EF5 <<< \\u1EF4"; | ||
| 441 | |||
| 442 | /* German Phonebook */ | ||
| 443 | static const char de_pb_cldr_30[] = | ||
| 444 | "&AE << \\u00E4 <<< \\u00C4 " | ||
| 445 | "&OE << \\u00F6 <<< \\u00D6 " | ||
| 446 | "&UE << \\u00FC <<< \\u00DC "; | ||
| 447 | |||
| 448 | /* Icelandic */ | ||
| 449 | static const char is_cldr_30[] = | ||
| 450 | "&[before 1]b < \\u00E1 <<< \\u00C1 " | ||
| 451 | "& d << \\u0111 <<< \\u0110 < \\u00F0 <<< \\u00D0 " | ||
| 452 | "&[before 1]f < \\u00E9 <<< \\u00C9 " | ||
| 453 | "&[before 1]j < \\u00ED <<< \\u00CD " | ||
| 454 | "&[before 1]p < \\u00F3 <<< \\u00D3 " | ||
| 455 | "&[before 1]v < \\u00FA <<< \\u00DA " | ||
| 456 | "&[before 1]z < \\u00FD <<< \\u00DD " | ||
| 457 | "&[before 1]\\u01C0 < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 " | ||
| 458 | "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 " | ||
| 459 | "< \\u00E5 <<< \\u00C5"; | ||
| 460 | |||
| 461 | /* Latvian */ | ||
| 462 | static const char lv_cldr_30[] = | ||
| 463 | "&[before 1]D < \\u010D <<< \\u010C " | ||
| 464 | "&[before 1]H < \\u0123 <<< \\u0122 " | ||
| 465 | "& I << y <<< Y " | ||
| 466 | "&[before 1]L < \\u0137 <<< \\u0136 " | ||
| 467 | "&[before 1]M < \\u013C <<< \\u013B " | ||
| 468 | "&[before 1]O < \\u0146 <<< \\u0145 " | ||
| 469 | "&[before 1]S < \\u0157 <<< \\u0156 " | ||
| 470 | "&[before 1]T < \\u0161 <<< \\u0160 " | ||
| 471 | "&[before 1]\\u01B7 < \\u017E <<< \\u017D"; | ||
| 472 | |||
| 473 | /* Romanian */ | ||
| 474 | static const char ro_cldr_30[] = | ||
| 475 | "&A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 " | ||
| 476 | "&I < \\u00EE <<< \\u00CE " | ||
| 477 | "&S < \\u015F = \\u0219 <<< \\u015E = \\u0218 " | ||
| 478 | "&T < \\u0163 = \\u021B <<< \\u0162 = \\u021A"; | ||
| 479 | |||
| 480 | /* Slovenian */ | ||
| 481 | static const char sl_cldr_30[] = | ||
| 482 | "&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106 " | ||
| 483 | "&D < \\u0111 <<< \\u0110 " | ||
| 484 | "&S < \\u0161 <<< \\u0160 " | ||
| 485 | "&Z < \\u017E <<< \\u017D"; | ||
| 486 | |||
| 487 | /* Polish */ | ||
| 488 | static const char pl_cldr_30[] = | ||
| 489 | "&A < \\u0105 <<< \\u0104 " | ||
| 490 | "&C < \\u0107 <<< \\u0106 " | ||
| 491 | "&E < \\u0119 <<< \\u0118 " | ||
| 492 | "&L < \\u0142 <<< \\u0141 " | ||
| 493 | "&N < \\u0144 <<< \\u0143 " | ||
| 494 | "&O < \\u00F3 <<< \\u00D3 " | ||
| 495 | "&S < \\u015B <<< \\u015A " | ||
| 496 | "&Z < \\u017A <<< \\u0179 < \\u017C <<< \\u017B"; | ||
| 497 | |||
| 498 | /* Estonian */ | ||
| 499 | static const char et_cldr_30[] = | ||
| 500 | "&[before 1]T < \\u0161 <<< \\u0160 < z <<< Z " | ||
| 501 | "< \\u017E <<< \\u017D " | ||
| 502 | "&[before 1]X < \\u00F5 <<< \\u00D5 < \\u00E4 <<< \\u00C4 " | ||
| 503 | "< \\u00F6 <<< \\u00D6 < \\u00FC <<< \\u00DC"; | ||
| 504 | |||
| 505 | /* Swedish */ | ||
| 506 | static const char sv_cldr_30[] = | ||
| 507 | "& D << \\u0111 <<< \\u0110 << \\u00F0 <<< \\u00D0 " | ||
| 508 | "& t <<< \\u00FE/h " | ||
| 509 | "& T <<< \\u00DE/H " | ||
| 510 | "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170 " | ||
| 511 | "&[before 1]\\u01C0 < \\u00E5 <<< \\u00C5 < \\u00E4 <<< \\u00C4 " | ||
| 512 | "<< \\u00E6 <<< \\u00C6 << \\u0119 <<< \\u0118 " | ||
| 513 | "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 " | ||
| 514 | "<< \\u0151 <<< \\u0150 << \\u0153 <<< \\u0152 " | ||
| 515 | "<< \\u00F4 <<< \\u00D4"; | ||
| 516 | |||
| 517 | /* Turkish */ | ||
| 518 | static const char tr_cldr_30[] = | ||
| 519 | "& C < \\u00E7 <<< \\u00C7 " | ||
| 520 | "& G < \\u011F <<< \\u011E " | ||
| 521 | "&[before 1]i < \\u0131 <<< I " | ||
| 522 | "& i <<< \\u0130 " | ||
| 523 | "& O < \\u00F6 <<< \\u00D6 " | ||
| 524 | "& S < \\u015F <<< \\u015E " | ||
| 525 | "& U < \\u00FC <<< \\u00DC "; | ||
| 526 | |||
| 527 | /* Czech */ | ||
| 528 | static const char cs_cldr_30[] = | ||
| 529 | "&C < \\u010D <<< \\u010C " | ||
| 530 | "&H < ch <<< cH <<< Ch <<< CH " | ||
| 531 | "&R < \\u0159 <<< \\u0158" | ||
| 532 | "&S < \\u0161 <<< \\u0160" | ||
| 533 | "&Z < \\u017E <<< \\u017D"; | ||
| 534 | |||
| 535 | /* Danish, same for Norwegian */ | ||
| 536 | static const char da_cldr_30[] = | ||
| 537 | "& D << \\u0111 <<< \\u0110 << \\u00F0 <<< \\u00D0 " | ||
| 538 | "& t <<< \\u00FE/h " | ||
| 539 | "& T <<< \\u00DE/H " | ||
| 540 | "& Y << \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170 " | ||
| 541 | "&[before 1]\\u01C0 < \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 " | ||
| 542 | "< \\u00F8 <<< \\u00D8 << \\u00F6 <<< \\u00D6 " | ||
| 543 | "<< \\u0151 <<< \\u0150 << \\u0153 <<< \\u0152 " | ||
| 544 | "< \\u00E5 <<< \\u00C5 <<< aa <<< Aa " | ||
| 545 | "<<< AA"; | ||
| 546 | |||
| 547 | static Coll_param da_coll_param = {nullptr, false, CASE_FIRST_UPPER}; | ||
| 548 | |||
| 549 | /* CASE FIRST OFF for Norwegian */ | ||
| 550 | static Coll_param no_coll_param = {nullptr, false, CASE_FIRST_OFF}; | ||
| 551 | |||
| 552 | /* Lithuanian */ | ||
| 553 | static const char lt_cldr_30[] = | ||
| 554 | "&\\u0300 = \\u0307\\u0300 " | ||
| 555 | "&\\u0301 = \\u0307\\u0301 " | ||
| 556 | "&\\u0303 = \\u0307\\u0303 " | ||
| 557 | "&A << \\u0105 <<< \\u0104 " | ||
| 558 | "&C < \\u010D <<< \\u010C " | ||
| 559 | "&E << \\u0119 <<< \\u0118 << \\u0117 <<< \\u0116" | ||
| 560 | "&I << \\u012F <<< \\u012E << y <<< Y " | ||
| 561 | "&S < \\u0161 <<< \\u0160 " | ||
| 562 | "&U << \\u0173 <<< \\u0172 << \\u016B <<< \\u016A " | ||
| 563 | "&Z < \\u017E <<< \\u017D"; | ||
| 564 | |||
| 565 | /* Slovak */ | ||
| 566 | static const char sk_cldr_30[] = | ||
| 567 | "&A < \\u00E4 <<< \\u00C4 " | ||
| 568 | "&C < \\u010D <<< \\u010C " | ||
| 569 | "&H < ch <<< cH <<< Ch <<< CH " | ||
| 570 | "&O < \\u00F4 <<< \\u00D4 " | ||
| 571 | "&R < \\u0159 <<< \\u0158 " | ||
| 572 | "&S < \\u0161 <<< \\u0160 " | ||
| 573 | "&Z < \\u017E <<< \\u017D"; | ||
| 574 | |||
| 575 | /* Spanish (Traditional) */ | ||
| 576 | static const char es_trad_cldr_30[] = | ||
| 577 | "&N < \\u00F1 <<< \\u00D1 " | ||
| 578 | "&C < ch <<< Ch <<< CH " | ||
| 579 | "&l < ll <<< Ll <<< LL"; | ||
| 580 | |||
| 581 | /* Persian */ | ||
| 582 | #if 0 | ||
| 583 | static const char fa_cldr_30[]= | ||
| 584 | "& \\u064E << \\u0650 << \\u064F << \\u064B << \\u064D " | ||
| 585 | "<< \\u064C " | ||
| 586 | "&[before 1]\\u0627 < \\u0622 " | ||
| 587 | "& \\u0627 << \\u0671 < \\u0621 << \\u0623 << \\u0672 " | ||
| 588 | "<< \\u0625 << \\u0673 << \\u0624 << \\u06CC\\u0654 " | ||
| 589 | "<<< \\u0649\\u0654 <<< \\u0626 " | ||
| 590 | "& \\u06A9 << \\u06AA << \\u06AB << \\u0643 << \\u06AC " | ||
| 591 | "<< \\u06AD << \\u06AE " | ||
| 592 | "& \\u06CF < \\u0647 << \\u06D5 << \\u06C1 << \\u0629 " | ||
| 593 | "<< \\u06C3 << \\u06C0 << \\u06BE " | ||
| 594 | "& \\u06CC << \\u0649 << \\u06D2 << \\u064A << \\u06D0 " | ||
| 595 | "<< \\u06D1 << \\u06CD << \\u06CE"; | ||
| 596 | |||
| 597 | static Reorder_param fa_reorder_param= { | ||
| 598 | {CHARGRP_ARAB, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0 | ||
| 599 | }; | ||
| 600 | |||
| 601 | static Coll_param fa_coll_param= { | ||
| 602 | &fa_reorder_param, true | ||
| 603 | }; | ||
| 604 | #endif | ||
| 605 | |||
| 606 | /* Hungarian */ | ||
| 607 | static const char hu_cldr_30[] = | ||
| 608 | "&C < cs <<< Cs <<< CS " | ||
| 609 | "&D < dz <<< Dz <<< DZ " | ||
| 610 | "&DZ < dzs <<< Dzs <<< DZS " | ||
| 611 | "&G < gy <<< Gy <<< GY " | ||
| 612 | "&L < ly <<< Ly <<< LY " | ||
| 613 | "&N < ny <<< Ny <<< NY " | ||
| 614 | "&S < sz <<< Sz <<< SZ " | ||
| 615 | "&T < ty <<< Ty <<< TY " | ||
| 616 | "&Z < zs <<< Zs <<< ZS " | ||
| 617 | "&O < \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150 " | ||
| 618 | "&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170 " | ||
| 619 | "&cs <<< ccs/cs " | ||
| 620 | "&Cs <<< Ccs/cs " | ||
| 621 | "&CS <<< CCS/CS " | ||
| 622 | "&dz <<< ddz/dz " | ||
| 623 | "&Dz <<< Ddz/dz " | ||
| 624 | "&DZ <<< DDZ/DZ " | ||
| 625 | "&dzs<<< ddzs/dzs " | ||
| 626 | "&Dzs<<< Ddzs/dzs " | ||
| 627 | "&DZS<<< DDZS/DZS " | ||
| 628 | "&gy <<< ggy/gy " | ||
| 629 | "&Gy <<< Ggy/gy " | ||
| 630 | "&GY <<< GGY/GY " | ||
| 631 | "&ly <<< lly/ly " | ||
| 632 | "&Ly <<< Lly/ly " | ||
| 633 | "&LY <<< LLY/LY " | ||
| 634 | "&ny <<< nny/ny " | ||
| 635 | "&Ny <<< Nny/ny " | ||
| 636 | "&NY <<< NNY/NY " | ||
| 637 | "&sz <<< ssz/sz " | ||
| 638 | "&Sz <<< Ssz/sz " | ||
| 639 | "&SZ <<< SSZ/SZ " | ||
| 640 | "&ty <<< tty/ty " | ||
| 641 | "&Ty <<< Tty/ty " | ||
| 642 | "&TY <<< TTY/TY " | ||
| 643 | "&zs <<< zzs/zs " | ||
| 644 | "&Zs <<< Zzs/zs " | ||
| 645 | "&ZS <<< ZZS/ZS"; | ||
| 646 | |||
| 647 | /* Croatian, same for Serbian with Latin and Bosnian. */ | ||
| 648 | static const char hr_cldr_30[] = | ||
| 649 | "&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106 " | ||
| 650 | "&D < d\\u017E <<< \\u01C6 <<< D\\u017E <<< \\u01C5 <<< D\\u017D " | ||
| 651 | "<<< \\u01C4 < \\u0111 <<< \\u0110 " | ||
| 652 | "&L < lj <<< \\u01C9 <<< Lj <<< \\u01C8 <<< LJ " | ||
| 653 | "<<< \\u01C7 " | ||
| 654 | "&N < nj <<< \\u01CC <<< Nj <<< \\u01CB <<< NJ " | ||
| 655 | "<<< \\u01CA " | ||
| 656 | "&S < \\u0161 <<< \\u0160 " | ||
| 657 | "&Z < \\u017E <<< \\u017D "; | ||
| 658 | |||
| 659 | static Reorder_param hr_reorder_param = { | ||
| 660 | {CHARGRP_LATIN, CHARGRP_CYRILLIC, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0, 0}; | ||
| 661 | |||
| 662 | static Coll_param hr_coll_param = {&hr_reorder_param, false, CASE_FIRST_OFF}; | ||
| 663 | |||
| 664 | /* Sinhala */ | ||
| 665 | #if 0 | ||
| 666 | static const char si_cldr_30[]= | ||
| 667 | "&\\u0D96 < \\u0D82 < \\u0D83 " | ||
| 668 | "&\\u0DA5 < \\u0DA4"; | ||
| 669 | #endif | ||
| 670 | |||
| 671 | /* Vietnamese */ | ||
| 672 | static const char vi_cldr_30[] = | ||
| 673 | "&\\u0300 << \\u0309 << \\u0303 << \\u0301 << \\u0323 " | ||
| 674 | "&a < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 " | ||
| 675 | "&d < \\u0111 <<< \\u0110 " | ||
| 676 | "&e < \\u00EA <<< \\u00CA " | ||
| 677 | "&o < \\u00F4 <<< \\u00D4 < \\u01A1 <<< \\u01A0 " | ||
| 678 | "&u < \\u01B0 <<< \\u01AF"; | ||
| 679 | |||
| 680 | static Coll_param vi_coll_param = {nullptr, true, CASE_FIRST_OFF}; | ||
| 681 | |||
| 682 | static Reorder_param ja_reorder_param = { | ||
| 683 | /* | ||
| 684 | Per CLDR 30, Japanese reorder rule is defined as [Latn Kana Hani], | ||
| 685 | but for Hani characters, their weight is implicit according to UCA, | ||
| 686 | which is different from other character groups. We don't add "Hani" | ||
| 687 | below and will have special handling for them in | ||
| 688 | adjust_japanese_weight() and apply_reorder_param(). Implicit weight | ||
| 689 | has two collation elements. To make strnxfrm() run faster, we give | ||
| 690 | Japanese Han characters tailored weight which has only one collation | ||
| 691 | element. These characters' weight is defined in ja_han_pages. | ||
| 692 | */ | ||
| 693 | {CHARGRP_LATIN, CHARGRP_KANA, CHARGRP_NONE}, | ||
| 694 | {{{0, 0}, {0, 0}}}, | ||
| 695 | 0, | ||
| 696 | 0}; | ||
| 697 | |||
| 698 | static Coll_param ja_coll_param = {&ja_reorder_param, false /*norm_enabled*/, | ||
| 699 | CASE_FIRST_OFF}; | ||
| 700 | |||
| 701 | /* | ||
| 702 | The Chinese reorder rule is defined as [Hani]. This means all Han characters' | ||
| 703 | weight should be greater than the core group and smaller than any other | ||
| 704 | character groups. | ||
| 705 | The Han characters are separated into two parts. The CLDR collation | ||
| 706 | definition file, zh.xml, defines 41336 Han characters' order, and all other | ||
| 707 | Han characters have implicit weight. | ||
| 708 | Since the core group characters occupy the weight value from 0x0209 to 0x1C46 | ||
| 709 | in DUCET, so we decide to set the weight of all Han characters defined in | ||
| 710 | zh.xml to be the value from 0x1C47 to 0xBDBE. The smallest weight value of | ||
| 711 | these Han characters, 0x1C47, being the largest weight value of the core | ||
| 712 | group plus one (0x1C46 + 1), ensures these Han characters sort greater than | ||
| 713 | the core group characters. | ||
| 714 | Also, we set the implicit weight to the Han characters like | ||
| 715 | [BDBF - BDC3, 0020, 0002][XXXX, 0000, 0000]. | ||
| 716 | To tailor the weight of characters of Latin, Cyrillic and so on to be bigger | ||
| 717 | than all Han characters, we give these characters weights from 0xBDC4 to | ||
| 718 | 0xF620. There are many character groups between the core group and the Han | ||
| 719 | group, so it would be a long list if we put them in the following reorder_grp | ||
| 720 | structure. But since it is a very simple weight shift, we put their calculated | ||
| 721 | weight here and do not calculate it in my_prepare_reorder(). | ||
| 722 | |||
| 723 | NOTE: We use the zh.xml file from CLDR v33.1 to implement this Chinese | ||
| 724 | collation, because we found that the file of CLDR v30 is missing some very | ||
| 725 | common Han characters (the Han character 'small', etc). | ||
| 726 | */ | ||
| 727 | static Reorder_param zh_reorder_param = { | ||
| 728 | {CHARGRP_NONE}, {{{0x1C47, 0x54A3}, {0xBDC4, 0xF620}}}, 1, 0x54A3}; | ||
| 729 | |||
| 730 | static Coll_param zh_coll_param = {&zh_reorder_param, false, CASE_FIRST_OFF}; | ||
| 731 | |||
| 732 | /* Russian, same for Bulgerian and Mongolian with Cyrillic letters */ | ||
| 733 | static Reorder_param ru_reorder_param = { | ||
| 734 | {CHARGRP_CYRILLIC, CHARGRP_NONE}, {{{0, 0}, {0, 0}}}, 0, 0}; | ||
| 735 | |||
| 736 | static Coll_param ru_coll_param = {&ru_reorder_param, false /*norm_enabled*/, | ||
| 737 | CASE_FIRST_OFF}; | ||
| 738 | |||
| 739 | static constexpr uint16 nochar[] = {0, 0}; | ||
| 740 | |||
| 741 | /** | ||
| 742 | Unicode Collation Algorithm: | ||
| 743 | Collation element (weight) scanner, | ||
| 744 | for consequent scan of collations | ||
| 745 | weights from a string. | ||
| 746 | |||
| 747 | Only meant as a base class; instantiate uca_scanner_any or uca_scanner_900 | ||
| 748 | instead of this. | ||
| 749 | */ | ||
| 750 | class my_uca_scanner { | ||
| 751 | protected: | ||
| 752 | 11076031406 | my_uca_scanner(const CHARSET_INFO *cs_arg, const uchar *str, size_t length) | |
| 753 | 11076031406 | : wbeg(nochar), | |
| 754 | 11076031406 | sbeg(str), | |
| 755 | 11076031406 | send(str + length), | |
| 756 | 11076031406 | uca(cs_arg->uca), | |
| 757 | 11076031406 | cs(cs_arg), | |
| 758 | 11076031406 | sbeg_dup(str) {} | |
| 759 | |||
| 760 | public: | ||
| 761 | /** | ||
| 762 | Get the level the scanner is currently working on. The string | ||
| 763 | can be scanned multiple times (if the collation requires multi-level | ||
| 764 | comparisons, e.g. for accent or case sensitivity); first to get | ||
| 765 | primary weights, then from the start again for secondary, etc. | ||
| 766 | */ | ||
| 767 | 38020806387 | uint get_weight_level() const { return weight_lv; } | |
| 768 | |||
| 769 | protected: | ||
| 770 | uint weight_lv{0}; /* 0 = Primary, 1 = Secondary, 2 = Tertiary */ | ||
| 771 | const uint16 *wbeg; /* Beginning of the current weight string */ | ||
| 772 | uint wbeg_stride{0}; /* Number of bytes between weights in string */ | ||
| 773 | const uchar *sbeg; /* Beginning of the input string */ | ||
| 774 | const uchar *send; /* End of the input string */ | ||
| 775 | const MY_UCA_INFO *uca; | ||
| 776 | uint16 implicit[10]; | ||
| 777 | my_wc_t prev_char{0}; // Previous code point we scanned, if any. | ||
| 778 | const CHARSET_INFO *cs; | ||
| 779 | uint num_of_ce_left{0}; | ||
| 780 | const uchar *sbeg_dup; /* Backup of beginning of input string */ | ||
| 781 | |||
| 782 | protected: | ||
| 783 | const uint16 *contraction_find(my_wc_t wc0, size_t *chars_skipped); | ||
| 784 | inline const uint16 *previous_context_find(my_wc_t wc0, my_wc_t wc1); | ||
| 785 | }; | ||
| 786 | |||
| 787 | /* | ||
| 788 | Charset dependent scanner part, to optimize | ||
| 789 | some character sets. | ||
| 790 | */ | ||
| 791 | |||
| 792 | template <class Mb_wc> | ||
| 793 | struct uca_scanner_any : public my_uca_scanner { | ||
| 794 | 508351708 | uca_scanner_any(const Mb_wc mb_wc, const CHARSET_INFO *cs_arg, | |
| 795 | const uchar *str, size_t length) | ||
| 796 | 508351708 | : my_uca_scanner(cs_arg, str, length), mb_wc(mb_wc) { | |
| 797 | // UCA 9.0.0 uses a different table format from what this scanner expects. | ||
| 798 |
2/4✓ Branch 0 taken 254175871 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 254175871 times.
|
508351724 | assert(cs_arg->uca == nullptr || cs_arg->uca->version != UCA_V900); |
| 799 | 508351724 | } | |
| 800 | |||
| 801 | 1014477204 | uint get_char_index() const { return char_index; } | |
| 802 | |||
| 803 | inline int next(); | ||
| 804 | |||
| 805 | private: | ||
| 806 | /** | ||
| 807 | How many code points (possibly multibyte) we have scanned so far. | ||
| 808 | This includes code points with zero weight. Note that this is reset | ||
| 809 | once we get to the end of the string and restart the scanning for | ||
| 810 | the next weight level, but it is _not_ reset when we reach the | ||
| 811 | end of the last level. | ||
| 812 | */ | ||
| 813 | uint char_index{0}; | ||
| 814 | |||
| 815 | const Mb_wc mb_wc; | ||
| 816 | |||
| 817 | inline int next_implicit(my_wc_t ch); | ||
| 818 | }; | ||
| 819 | |||
| 820 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 821 | class uca_scanner_900 : public my_uca_scanner { | ||
| 822 | public: | ||
| 823 | 21643811440 | uca_scanner_900(const Mb_wc mb_wc, const CHARSET_INFO *cs_arg, | |
| 824 | const uchar *str, size_t length) | ||
| 825 | 21643811440 | : my_uca_scanner(cs_arg, str, length), mb_wc(mb_wc) {} | |
| 826 | |||
| 827 | inline int next(); | ||
| 828 | |||
| 829 | /** | ||
| 830 | For each weight in sequence, call "func", which should have | ||
| 831 | a function signature of "bool func(int weight, bool is_level_separator)". | ||
| 832 | Stops the iteration early if "func" returns false. | ||
| 833 | |||
| 834 | This is morally equivalent to | ||
| 835 | |||
| 836 | int weight; | ||
| 837 | while ((weight= next()) >= 0) | ||
| 838 | { | ||
| 839 | if (!func(weight, weight == 0)) break; | ||
| 840 | } | ||
| 841 | |||
| 842 | except that it might employ optimizations internally to speed up | ||
| 843 | the process. These optimizations will not modify the number of calls | ||
| 844 | to func() (or their order), but might affect the internal scanner | ||
| 845 | state during the calls, so func() should not try to read from | ||
| 846 | the scanner except by calling public member functions. | ||
| 847 | |||
| 848 | As a special optimization, if "bool preaccept_data(int num_weights)" | ||
| 849 | returns true, the next "num_weights" calls to func() _must_ return | ||
| 850 | true. This is so that bounds checking costs can be amortized | ||
| 851 | over fewer calls. | ||
| 852 | */ | ||
| 853 | template <class T, class U> | ||
| 854 | inline void for_each_weight(T func, U preaccept_data); | ||
| 855 | |||
| 856 | private: | ||
| 857 | const Mb_wc mb_wc; | ||
| 858 | |||
| 859 | inline int next_raw(); | ||
| 860 | inline int more_weight(); | ||
| 861 | uint16 apply_case_first(uint16 weight); | ||
| 862 | uint16 apply_reorder_param(uint16 weight); | ||
| 863 | inline int next_implicit(my_wc_t ch); | ||
| 864 | void my_put_jamo_weights(my_wc_t *hangul_jamo, int jamo_cnt); | ||
| 865 | /* | ||
| 866 | apply_reorder_param() needs to return two weights for each origin | ||
| 867 | weight. This boolean signals whether we have already returned the | ||
| 868 | FB86 weight, and are ready to return the origin weight. | ||
| 869 | */ | ||
| 870 | bool return_origin_weight{true}; | ||
| 871 | /* | ||
| 872 | For Japanese kana-sensitive collation, we only add quaternary | ||
| 873 | weight for katakana and hiragana, but not for others like latin | ||
| 874 | and kanji, because characters like latin and kanji can be already | ||
| 875 | distinguished from kana by three levels of weight. | ||
| 876 | has_quaternary_weight is to indicate whether quaternary weight is | ||
| 877 | needed for characters in string. | ||
| 878 | */ | ||
| 879 | bool has_quaternary_weight{false}; | ||
| 880 | int handle_ja_contraction_quat_wt(); | ||
| 881 | int handle_ja_common_quat_wt(my_wc_t wc); | ||
| 882 | }; | ||
| 883 | |||
| 884 | /********** Helper functions to handle contraction ************/ | ||
| 885 | |||
| 886 | /** | ||
| 887 | Mark a code point as a contraction part | ||
| 888 | |||
| 889 | @param flags Pointer to UCA contraction flag data | ||
| 890 | @param wc Unicode code point | ||
| 891 | @param flag flag: "is contraction head", "is contraction tail" | ||
| 892 | */ | ||
| 893 | |||
| 894 | 546791 | static inline void my_uca_add_contraction_flag(char *flags, my_wc_t wc, | |
| 895 | int flag) { | ||
| 896 | 546791 | flags[wc & MY_UCA_CNT_FLAG_MASK] |= flag; | |
| 897 | 546791 | } | |
| 898 | |||
| 899 | /** | ||
| 900 | Check if UCA level data has contractions. | ||
| 901 | |||
| 902 | @param uca Pointer to UCA data | ||
| 903 | |||
| 904 | @return Flags indicating if UCA with contractions | ||
| 905 | @retval 0 - no contractions | ||
| 906 | @retval 1 - there are some contractions | ||
| 907 | */ | ||
| 908 | |||
| 909 | 37803780609 | static inline bool my_uca_have_contractions(const MY_UCA_INFO *uca) { | |
| 910 | 37803780609 | return uca->have_contractions; | |
| 911 | } | ||
| 912 | |||
| 913 | struct trie_node_cmp { | ||
| 914 | 3021597 | bool operator()(const MY_CONTRACTION &a, const my_wc_t b) { return a.ch < b; } | |
| 915 | bool operator()(const MY_CONTRACTION &a, const MY_CONTRACTION &b) { | ||
| 916 | return a.ch < b.ch; | ||
| 917 | } | ||
| 918 | }; | ||
| 919 | |||
| 920 | static std::vector<MY_CONTRACTION>::const_iterator | ||
| 921 | 489459 | find_contraction_part_in_trie(const std::vector<MY_CONTRACTION> &cont_nodes, | |
| 922 | my_wc_t ch) { | ||
| 923 |
2/2✓ Branch 0 taken 21062 times.
✓ Branch 1 taken 468397 times.
|
489459 | if (cont_nodes.empty()) return cont_nodes.end(); |
| 924 | 468397 | return std::lower_bound(cont_nodes.begin(), cont_nodes.end(), ch, | |
| 925 | 468397 | trie_node_cmp()); | |
| 926 | } | ||
| 927 | |||
| 928 | 549253 | static std::vector<MY_CONTRACTION>::iterator find_contraction_part_in_trie( | |
| 929 | std::vector<MY_CONTRACTION> &cont_nodes, my_wc_t ch) { | ||
| 930 |
2/2✓ Branch 0 taken 52185 times.
✓ Branch 1 taken 497068 times.
|
549253 | if (cont_nodes.empty()) return cont_nodes.end(); |
| 931 | 497068 | return std::lower_bound(cont_nodes.begin(), cont_nodes.end(), ch, | |
| 932 | 497068 | trie_node_cmp()); | |
| 933 | } | ||
| 934 | /** | ||
| 935 | Find a contraction consisting of two code points and return its weight array | ||
| 936 | |||
| 937 | @param cont_nodes Vector that contains contraction nodes | ||
| 938 | @param wc1 First code point | ||
| 939 | @param wc2 Second code point | ||
| 940 | |||
| 941 | @return Weight array | ||
| 942 | @retval NULL - no contraction found | ||
| 943 | @retval ptr - contraction weight array | ||
| 944 | */ | ||
| 945 | |||
| 946 | 199 | const uint16 *my_uca_contraction2_weight( | |
| 947 | const std::vector<MY_CONTRACTION> *cont_nodes, my_wc_t wc1, my_wc_t wc2) { | ||
| 948 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 199 times.
|
199 | if (!cont_nodes) return nullptr; |
| 949 | |||
| 950 |
1/2✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
|
199 | if (!cont_nodes->empty()) { |
| 951 | std::vector<MY_CONTRACTION>::const_iterator node_it1 = | ||
| 952 |
1/2✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
|
199 | find_contraction_part_in_trie(*cont_nodes, wc1); |
| 953 |
3/6✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 199 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 199 times.
|
398 | if (node_it1 == cont_nodes->end() || node_it1->ch != wc1) return nullptr; |
| 954 | std::vector<MY_CONTRACTION>::const_iterator node_it2 = | ||
| 955 |
1/2✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
|
199 | find_contraction_part_in_trie(node_it1->child_nodes, wc2); |
| 956 |
3/6✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 199 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 199 times.
✗ Branch 5 not taken.
|
398 | if (node_it2 != node_it1->child_nodes.end() && node_it2->ch == wc2 && |
| 957 |
1/2✓ Branch 0 taken 199 times.
✗ Branch 1 not taken.
|
199 | node_it2->is_contraction_tail) |
| 958 | 199 | return node_it2->weight; | |
| 959 | } | ||
| 960 | ✗ | return nullptr; | |
| 961 | } | ||
| 962 | |||
| 963 | /** | ||
| 964 | Check if a code point can be previous context head | ||
| 965 | |||
| 966 | @param flags Pointer to UCA contraction flag data | ||
| 967 | @param wc Code point | ||
| 968 | |||
| 969 | @retval false - cannot be previous context head | ||
| 970 | @retval true - can be previous context head | ||
| 971 | */ | ||
| 972 | |||
| 973 | 27040 | static inline bool my_uca_can_be_previous_context_head(const char *flags, | |
| 974 | my_wc_t wc) { | ||
| 975 | 27040 | return flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_HEAD; | |
| 976 | } | ||
| 977 | |||
| 978 | /** | ||
| 979 | Check if a code point can be previous context tail | ||
| 980 | |||
| 981 | @param flags Pointer to UCA contraction flag data | ||
| 982 | @param wc Code point | ||
| 983 | |||
| 984 | @retval false - cannot be contraction tail | ||
| 985 | @retval true - can be contraction tail | ||
| 986 | */ | ||
| 987 | |||
| 988 | 182990785 | static inline bool my_uca_can_be_previous_context_tail(const char *flags, | |
| 989 | my_wc_t wc) { | ||
| 990 | 182990785 | return flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_TAIL; | |
| 991 | } | ||
| 992 | |||
| 993 | /** | ||
| 994 | Check if a string is a contraction of exactly the given length, | ||
| 995 | and return its weight array on success. | ||
| 996 | |||
| 997 | @param cont_nodes Vector that contains contraction nodes | ||
| 998 | @param wc Pointer to wide string | ||
| 999 | @param len String length | ||
| 1000 | |||
| 1001 | @return Weight array | ||
| 1002 | @retval NULL - Input string is not a known contraction | ||
| 1003 | @retval ptr - contraction weight array | ||
| 1004 | */ | ||
| 1005 | |||
| 1006 | 376988 | static inline const uint16 *my_uca_contraction_weight( | |
| 1007 | const std::vector<MY_CONTRACTION> *cont_nodes, const my_wc_t *wc, | ||
| 1008 | size_t len) { | ||
| 1009 |
2/2✓ Branch 0 taken 180420 times.
✓ Branch 1 taken 196568 times.
|
376988 | if (!cont_nodes) return nullptr; |
| 1010 | |||
| 1011 | 196568 | std::vector<MY_CONTRACTION>::const_iterator node_it; | |
| 1012 |
2/2✓ Branch 0 taken 270557 times.
✓ Branch 1 taken 20987 times.
|
291544 | for (size_t ch_ind = 0; ch_ind < len; ++ch_ind) { |
| 1013 |
1/2✓ Branch 0 taken 270557 times.
✗ Branch 1 not taken.
|
270557 | node_it = find_contraction_part_in_trie(*cont_nodes, wc[ch_ind]); |
| 1014 |
6/6✓ Branch 0 taken 200743 times.
✓ Branch 1 taken 69814 times.
✓ Branch 2 taken 105767 times.
✓ Branch 3 taken 94976 times.
✓ Branch 4 taken 175581 times.
✓ Branch 5 taken 94976 times.
|
270557 | if (node_it == cont_nodes->end() || node_it->ch != wc[ch_ind]) |
| 1015 | 175581 | return nullptr; | |
| 1016 | 94976 | cont_nodes = &node_it->child_nodes; | |
| 1017 | } | ||
| 1018 |
2/2✓ Branch 0 taken 20027 times.
✓ Branch 1 taken 960 times.
|
20987 | if (node_it->is_contraction_tail) return node_it->weight; |
| 1019 | 960 | return nullptr; | |
| 1020 | } | ||
| 1021 | |||
| 1022 | /** | ||
| 1023 | Return length of a 0-terminated wide string, analogous to strnlen(). | ||
| 1024 | |||
| 1025 | @param s Pointer to wide string | ||
| 1026 | @param maxlen Mamixum string length | ||
| 1027 | |||
| 1028 | @return string length, or maxlen if no '\0' is met. | ||
| 1029 | */ | ||
| 1030 | 2813640 | static size_t my_wstrnlen(my_wc_t *s, size_t maxlen) { | |
| 1031 |
2/2✓ Branch 0 taken 6517219 times.
✓ Branch 1 taken 1081 times.
|
6518300 | for (size_t i = 0; i < maxlen; i++) { |
| 1032 |
2/2✓ Branch 0 taken 2812559 times.
✓ Branch 1 taken 3704660 times.
|
6517219 | if (s[i] == 0) return i; |
| 1033 | } | ||
| 1034 | 1081 | return maxlen; | |
| 1035 | } | ||
| 1036 | |||
| 1037 | /** | ||
| 1038 | Find a contraction in the input stream and return its weight array | ||
| 1039 | |||
| 1040 | Scan input code points to find a longest path in contraction trie | ||
| 1041 | which contains all these code points. If the ending node of this | ||
| 1042 | path is end of contraction, return the weight array. | ||
| 1043 | |||
| 1044 | @param wc0 The first code point of the contraction (which should have | ||
| 1045 | the MY_UCA_CNT_HEAD flag). | ||
| 1046 | @param[out] chars_skipped How many code points where skipped in the | ||
| 1047 | contraction we found. Only makes sense if we actually found one. | ||
| 1048 | |||
| 1049 | @return Weight array | ||
| 1050 | @retval NULL no contraction found | ||
| 1051 | @retval ptr contraction weight array | ||
| 1052 | */ | ||
| 1053 | |||
| 1054 | 201097 | const uint16 *my_uca_scanner::contraction_find(my_wc_t wc0, | |
| 1055 | size_t *chars_skipped) { | ||
| 1056 | 201097 | const uchar *beg = nullptr; | |
| 1057 | 201097 | auto mb_wc = cs->cset->mb_wc; | |
| 1058 | |||
| 1059 | 201097 | const uchar *s = sbeg; | |
| 1060 | 201097 | const std::vector<MY_CONTRACTION> *cont_nodes = uca->contraction_nodes; | |
| 1061 | 201097 | const MY_CONTRACTION *longest_contraction = nullptr; | |
| 1062 | 201097 | std::vector<MY_CONTRACTION>::const_iterator node_it; | |
| 1063 | for (;;) { | ||
| 1064 |
1/2✓ Branch 0 taken 216042 times.
✗ Branch 1 not taken.
|
216042 | node_it = find_contraction_part_in_trie(*cont_nodes, wc0); |
| 1065 |
6/6✓ Branch 0 taken 26844 times.
✓ Branch 1 taken 189198 times.
✓ Branch 2 taken 6476 times.
✓ Branch 3 taken 20368 times.
✓ Branch 4 taken 195674 times.
✓ Branch 5 taken 20368 times.
|
216042 | if (node_it == cont_nodes->end() || node_it->ch != wc0) break; |
| 1066 |
2/2✓ Branch 0 taken 3734 times.
✓ Branch 1 taken 16634 times.
|
20368 | if (node_it->is_contraction_tail) { |
| 1067 | 3734 | longest_contraction = &(*node_it); | |
| 1068 | 3734 | beg = s; | |
| 1069 | 3734 | *chars_skipped = node_it->contraction_len - 1; | |
| 1070 | } | ||
| 1071 | int mblen; | ||
| 1072 |
3/4✓ Branch 0 taken 20368 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5423 times.
✓ Branch 3 taken 14945 times.
|
20368 | if ((mblen = mb_wc(cs, &wc0, s, send)) <= 0) break; |
| 1073 | 14945 | s += mblen; | |
| 1074 | 14945 | cont_nodes = &node_it->child_nodes; | |
| 1075 | 14945 | } | |
| 1076 | |||
| 1077 |
2/2✓ Branch 0 taken 3509 times.
✓ Branch 1 taken 197588 times.
|
201097 | if (longest_contraction != nullptr) { |
| 1078 | 3509 | const uint16 *cweight = longest_contraction->weight; | |
| 1079 |
2/2✓ Branch 0 taken 890 times.
✓ Branch 1 taken 2619 times.
|
3509 | if (uca->version == UCA_V900) { |
| 1080 | 890 | cweight += weight_lv; | |
| 1081 | 890 | wbeg = cweight + MY_UCA_900_CE_SIZE; | |
| 1082 | 890 | wbeg_stride = MY_UCA_900_CE_SIZE; | |
| 1083 | 890 | num_of_ce_left = 7; | |
| 1084 | } else { | ||
| 1085 | 2619 | wbeg = cweight + 1; | |
| 1086 | 2619 | wbeg_stride = MY_UCA_900_CE_SIZE; | |
| 1087 | } | ||
| 1088 | 3509 | sbeg = beg; | |
| 1089 | 3509 | return cweight; | |
| 1090 | } | ||
| 1091 | 197588 | return nullptr; /* No contractions were found */ | |
| 1092 | } | ||
| 1093 | |||
| 1094 | /** | ||
| 1095 | Find weight for contraction with previous context | ||
| 1096 | and return its weight array. | ||
| 1097 | |||
| 1098 | @param wc0 Previous code point | ||
| 1099 | @param wc1 Current code point | ||
| 1100 | |||
| 1101 | @return Weight array | ||
| 1102 | @retval NULL - no contraction with context found | ||
| 1103 | @retval ptr - contraction weight array | ||
| 1104 | */ | ||
| 1105 | ALWAYS_INLINE | ||
| 1106 | const uint16 *my_uca_scanner::previous_context_find(my_wc_t wc0, my_wc_t wc1) { | ||
| 1107 | std::vector<MY_CONTRACTION>::const_iterator node_it1 = | ||
| 1108 | 2462 | find_contraction_part_in_trie(*uca->contraction_nodes, wc1); | |
| 1109 |
18/444✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 1613 times.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✓ Branch 51 taken 1613 times.
✗ Branch 52 not taken.
✓ Branch 53 taken 1613 times.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 639 times.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✓ Branch 63 taken 639 times.
✗ Branch 64 not taken.
✓ Branch 65 taken 639 times.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✓ Branch 144 taken 56 times.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✓ Branch 147 taken 56 times.
✗ Branch 148 not taken.
✓ Branch 149 taken 56 times.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✗ Branch 248 not taken.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✗ Branch 251 not taken.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✗ Branch 255 not taken.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✓ Branch 264 taken 57 times.
✗ Branch 265 not taken.
✗ Branch 266 not taken.
✓ Branch 267 taken 57 times.
✗ Branch 268 not taken.
✓ Branch 269 taken 57 times.
✓ Branch 270 taken 57 times.
✗ Branch 271 not taken.
✗ Branch 272 not taken.
✓ Branch 273 taken 57 times.
✗ Branch 274 not taken.
✓ Branch 275 taken 57 times.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✗ Branch 310 not taken.
✗ Branch 311 not taken.
✗ Branch 312 not taken.
✗ Branch 313 not taken.
✗ Branch 314 not taken.
✗ Branch 315 not taken.
✗ Branch 316 not taken.
✗ Branch 317 not taken.
✗ Branch 318 not taken.
✗ Branch 319 not taken.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✓ Branch 336 taken 40 times.
✗ Branch 337 not taken.
✗ Branch 338 not taken.
✓ Branch 339 taken 40 times.
✗ Branch 340 not taken.
✓ Branch 341 taken 40 times.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✗ Branch 376 not taken.
✗ Branch 377 not taken.
✗ Branch 378 not taken.
✗ Branch 379 not taken.
✗ Branch 380 not taken.
✗ Branch 381 not taken.
✗ Branch 382 not taken.
✗ Branch 383 not taken.
✗ Branch 384 not taken.
✗ Branch 385 not taken.
✗ Branch 386 not taken.
✗ Branch 387 not taken.
✗ Branch 388 not taken.
✗ Branch 389 not taken.
✗ Branch 390 not taken.
✗ Branch 391 not taken.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✗ Branch 400 not taken.
✗ Branch 401 not taken.
✗ Branch 402 not taken.
✗ Branch 403 not taken.
✗ Branch 404 not taken.
✗ Branch 405 not taken.
✗ Branch 406 not taken.
✗ Branch 407 not taken.
✗ Branch 408 not taken.
✗ Branch 409 not taken.
✗ Branch 410 not taken.
✗ Branch 411 not taken.
✗ Branch 412 not taken.
✗ Branch 413 not taken.
✗ Branch 414 not taken.
✗ Branch 415 not taken.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✗ Branch 424 not taken.
✗ Branch 425 not taken.
✗ Branch 426 not taken.
✗ Branch 427 not taken.
✗ Branch 428 not taken.
✗ Branch 429 not taken.
✗ Branch 430 not taken.
✗ Branch 431 not taken.
✗ Branch 432 not taken.
✗ Branch 433 not taken.
✗ Branch 434 not taken.
✗ Branch 435 not taken.
✗ Branch 436 not taken.
✗ Branch 437 not taken.
✗ Branch 438 not taken.
✗ Branch 439 not taken.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
|
2462 | if (node_it1 == uca->contraction_nodes->end() || node_it1->ch != wc1) |
| 1110 | ✗ | return nullptr; | |
| 1111 | std::vector<MY_CONTRACTION>::const_iterator node_it2 = | ||
| 1112 |
6/148✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 1613 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 639 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 56 times.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 57 times.
✗ Branch 89 not taken.
✓ Branch 90 taken 57 times.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✓ Branch 112 taken 40 times.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
|
2462 | find_contraction_part_in_trie(node_it1->child_nodes_context, wc0); |
| 1113 |
18/444✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 1613 times.
✗ Branch 49 not taken.
✓ Branch 50 taken 1613 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 1613 times.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 639 times.
✗ Branch 61 not taken.
✓ Branch 62 taken 639 times.
✗ Branch 63 not taken.
✓ Branch 64 taken 639 times.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✓ Branch 144 taken 56 times.
✗ Branch 145 not taken.
✓ Branch 146 taken 56 times.
✗ Branch 147 not taken.
✓ Branch 148 taken 56 times.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✗ Branch 248 not taken.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✗ Branch 251 not taken.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✗ Branch 255 not taken.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✓ Branch 264 taken 57 times.
✗ Branch 265 not taken.
✓ Branch 266 taken 57 times.
✗ Branch 267 not taken.
✓ Branch 268 taken 57 times.
✗ Branch 269 not taken.
✓ Branch 270 taken 57 times.
✗ Branch 271 not taken.
✓ Branch 272 taken 57 times.
✗ Branch 273 not taken.
✓ Branch 274 taken 57 times.
✗ Branch 275 not taken.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✗ Branch 310 not taken.
✗ Branch 311 not taken.
✗ Branch 312 not taken.
✗ Branch 313 not taken.
✗ Branch 314 not taken.
✗ Branch 315 not taken.
✗ Branch 316 not taken.
✗ Branch 317 not taken.
✗ Branch 318 not taken.
✗ Branch 319 not taken.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✓ Branch 336 taken 40 times.
✗ Branch 337 not taken.
✓ Branch 338 taken 40 times.
✗ Branch 339 not taken.
✓ Branch 340 taken 40 times.
✗ Branch 341 not taken.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✗ Branch 376 not taken.
✗ Branch 377 not taken.
✗ Branch 378 not taken.
✗ Branch 379 not taken.
✗ Branch 380 not taken.
✗ Branch 381 not taken.
✗ Branch 382 not taken.
✗ Branch 383 not taken.
✗ Branch 384 not taken.
✗ Branch 385 not taken.
✗ Branch 386 not taken.
✗ Branch 387 not taken.
✗ Branch 388 not taken.
✗ Branch 389 not taken.
✗ Branch 390 not taken.
✗ Branch 391 not taken.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✗ Branch 400 not taken.
✗ Branch 401 not taken.
✗ Branch 402 not taken.
✗ Branch 403 not taken.
✗ Branch 404 not taken.
✗ Branch 405 not taken.
✗ Branch 406 not taken.
✗ Branch 407 not taken.
✗ Branch 408 not taken.
✗ Branch 409 not taken.
✗ Branch 410 not taken.
✗ Branch 411 not taken.
✗ Branch 412 not taken.
✗ Branch 413 not taken.
✗ Branch 414 not taken.
✗ Branch 415 not taken.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✗ Branch 424 not taken.
✗ Branch 425 not taken.
✗ Branch 426 not taken.
✗ Branch 427 not taken.
✗ Branch 428 not taken.
✗ Branch 429 not taken.
✗ Branch 430 not taken.
✗ Branch 431 not taken.
✗ Branch 432 not taken.
✗ Branch 433 not taken.
✗ Branch 434 not taken.
✗ Branch 435 not taken.
✗ Branch 436 not taken.
✗ Branch 437 not taken.
✗ Branch 438 not taken.
✗ Branch 439 not taken.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
|
2462 | if (node_it2 != node_it1->child_nodes_context.end() && node_it2->ch == wc0) { |
| 1114 |
6/148✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 1613 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 639 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 56 times.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 57 times.
✗ Branch 89 not taken.
✓ Branch 90 taken 57 times.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✓ Branch 113 taken 40 times.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
|
2462 | if (uca->version == UCA_V900) { |
| 1115 | 2422 | wbeg = node_it2->weight + MY_UCA_900_CE_SIZE + weight_lv; | |
| 1116 | 2422 | wbeg_stride = MY_UCA_900_CE_SIZE; | |
| 1117 | 2422 | num_of_ce_left = 7; | |
| 1118 | } else { | ||
| 1119 | 40 | wbeg = node_it2->weight + 1; | |
| 1120 | 40 | wbeg_stride = MY_UCA_900_CE_SIZE; | |
| 1121 | } | ||
| 1122 | 2462 | return node_it2->weight + weight_lv; | |
| 1123 | } | ||
| 1124 | ✗ | return nullptr; | |
| 1125 | } | ||
| 1126 | |||
| 1127 | /****************************************************************/ | ||
| 1128 | #define HANGUL_JAMO_MAX_LENGTH 3 | ||
| 1129 | /** | ||
| 1130 | Check if a code point is Hangul syllable. Decompose it to jamos | ||
| 1131 | if it is, and return tailored weights. | ||
| 1132 | |||
| 1133 | @param syllable Hangul syllable to be decomposed | ||
| 1134 | @param[out] jamo Corresponding jamos | ||
| 1135 | |||
| 1136 | @return 0 The code point is not Hangul syllable | ||
| 1137 | or cannot be decomposed | ||
| 1138 | others The number of jamos returned | ||
| 1139 | */ | ||
| 1140 | 408141570 | static int my_decompose_hangul_syllable(my_wc_t syllable, my_wc_t *jamo) { | |
| 1141 |
4/4✓ Branch 0 taken 398320029 times.
✓ Branch 1 taken 9821541 times.
✓ Branch 2 taken 394125971 times.
✓ Branch 3 taken 4194058 times.
|
408141570 | if (syllable < 0xAC00 || syllable > 0xD7AF) return 0; |
| 1142 | 4194058 | constexpr uint syllable_base = 0xAC00; | |
| 1143 | 4194058 | constexpr uint leadingjamo_base = 0x1100; | |
| 1144 | 4194058 | constexpr uint voweljamo_base = 0x1161; | |
| 1145 | 4194058 | constexpr uint trailingjamo_base = 0x11A7; | |
| 1146 | 4194058 | constexpr uint voweljamo_cnt = 21; | |
| 1147 | 4194058 | constexpr uint trailingjamo_cnt = 28; | |
| 1148 | 4194058 | const uint syllable_index = syllable - syllable_base; | |
| 1149 | 4194058 | const uint v_t_combination = voweljamo_cnt * trailingjamo_cnt; | |
| 1150 | 4194058 | const uint leadingjamo_index = syllable_index / v_t_combination; | |
| 1151 | 4194058 | const uint voweljamo_index = | |
| 1152 | 4194058 | (syllable_index % v_t_combination) / trailingjamo_cnt; | |
| 1153 | 4194058 | const uint trailingjamo_index = syllable_index % trailingjamo_cnt; | |
| 1154 | 4194058 | jamo[0] = leadingjamo_base + leadingjamo_index; | |
| 1155 | 4194058 | jamo[1] = voweljamo_base + voweljamo_index; | |
| 1156 |
2/2✓ Branch 0 taken 4043934 times.
✓ Branch 1 taken 150124 times.
|
4194058 | jamo[2] = trailingjamo_index ? (trailingjamo_base + trailingjamo_index) : 0; |
| 1157 |
2/2✓ Branch 0 taken 4043934 times.
✓ Branch 1 taken 150124 times.
|
4194058 | return trailingjamo_index ? 3 : 2; |
| 1158 | } | ||
| 1159 | |||
| 1160 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 1161 | 8388116 | void uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::my_put_jamo_weights( | |
| 1162 | my_wc_t *hangul_jamo, int jamo_cnt) { | ||
| 1163 |
2/2✓ Branch 0 taken 12432050 times.
✓ Branch 1 taken 4194058 times.
|
33252216 | for (int jamoind = 0; jamoind < jamo_cnt; jamoind++) { |
| 1164 | 24864100 | uint16 *implicit_weight = implicit + jamoind * MY_UCA_900_CE_SIZE; | |
| 1165 | 24864100 | uint page = hangul_jamo[jamoind] >> 8; | |
| 1166 | 24864100 | uint code = hangul_jamo[jamoind] & 0xFF; | |
| 1167 | 24864100 | const uint16 *jamo_weight_page = uca->weights[page]; | |
| 1168 | 24864100 | implicit_weight[0] = UCA900_WEIGHT(jamo_weight_page, 0, code); | |
| 1169 | 24864100 | implicit_weight[1] = UCA900_WEIGHT(jamo_weight_page, 1, code); | |
| 1170 | 24864100 | implicit_weight[2] = UCA900_WEIGHT(jamo_weight_page, 2, code); | |
| 1171 | } | ||
| 1172 | 8388116 | implicit[9] = jamo_cnt; | |
| 1173 | } | ||
| 1174 | |||
| 1175 | /* | ||
| 1176 | Chinese Han characters are assigned an implicit weight according to the | ||
| 1177 | Unicode Collation Algorithm. But when creating our Chinese collation for | ||
| 1178 | utf8mb4, to implement this language's reorder rule, we give the Han | ||
| 1179 | characters in CLDR zh.xml file weight values from 0x1C47 to 0xBDBE, and let | ||
| 1180 | the other Han characters keep their implicit weight. Per UCA, the smallest | ||
| 1181 | leading primary weight of the implicit weight is 0xFB00, and the largest | ||
| 1182 | primary weight we ocuppy for the Han characters in zh.xml is 0xBDBE. There is | ||
| 1183 | a huge gap between these two weight values. To use this weight value gap and | ||
| 1184 | let the character groups like Latin, Cyrillic, have a single primary weight as | ||
| 1185 | before reordering, we change the leading primary weight of the implicit weight | ||
| 1186 | as below. | ||
| 1187 | */ | ||
| 1188 | 10147731 | static uint16 change_zh_implicit(uint16 weight) { | |
| 1189 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10147731 times.
|
10147731 | assert(weight >= 0xFB00); |
| 1190 |
6/7✓ Branch 0 taken 62409 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1884 times.
✓ Branch 3 taken 471 times.
✓ Branch 4 taken 10519 times.
✓ Branch 5 taken 51845 times.
✓ Branch 6 taken 10020603 times.
|
10147731 | switch (weight) { |
| 1191 | 62409 | case 0xFB00: | |
| 1192 | 62409 | return 0xF621; | |
| 1193 | ✗ | case 0xFB40: | |
| 1194 | ✗ | return 0xBDBF; | |
| 1195 | 1884 | case 0xFB41: | |
| 1196 | 1884 | return 0xBDC0; | |
| 1197 | 471 | case 0xFB80: | |
| 1198 | 471 | return 0xBDC1; | |
| 1199 | 10519 | case 0xFB84: | |
| 1200 | 10519 | return 0xBDC2; | |
| 1201 | 51845 | case 0xFB85: | |
| 1202 | 51845 | return 0xBDC3; | |
| 1203 | 10020603 | default: | |
| 1204 | 10020603 | return weight + 0xF622 - 0xFBC0; | |
| 1205 | } | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 1209 | ALWAYS_INLINE int uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::next_implicit( | ||
| 1210 | my_wc_t ch) { | ||
| 1211 | my_wc_t hangul_jamo[HANGUL_JAMO_MAX_LENGTH]; | ||
| 1212 | int jamo_cnt; | ||
| 1213 |
21/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 99072 times.
✓ Branch 17 taken 9377766 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 2972160 times.
✓ Branch 21 taken 286125393 times.
✓ Branch 22 taken 99072 times.
✓ Branch 23 taken 9566649 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 66054 times.
✓ Branch 27 taken 6377946 times.
✓ Branch 28 taken 924672 times.
✓ Branch 29 taken 89284608 times.
✓ Branch 30 taken 33024 times.
✓ Branch 31 taken 3195748 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✓ Branch 63 taken 26 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 3 times.
✓ Branch 95 taken 139 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 119 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✓ Branch 101 taken 138 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 117 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 1 times.
✓ Branch 107 taken 8034 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 10829 times.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
|
408141570 | if ((jamo_cnt = my_decompose_hangul_syllable(ch, hangul_jamo))) { |
| 1214 | 4194058 | my_put_jamo_weights(hangul_jamo, jamo_cnt); | |
| 1215 | 4194058 | num_of_ce_left = jamo_cnt - 1; | |
| 1216 | 4194058 | wbeg = implicit + MY_UCA_900_CE_SIZE + weight_lv; | |
| 1217 | 4194058 | wbeg_stride = MY_UCA_900_CE_SIZE; | |
| 1218 | 4194058 | return *(implicit + weight_lv); | |
| 1219 | } | ||
| 1220 | |||
| 1221 | /* | ||
| 1222 | We give the Chinese collation different leading primary weight to make | ||
| 1223 | sure there are enough single weight values to be assigned to character | ||
| 1224 | groups like Latin, Cyrillic, etc. | ||
| 1225 | */ | ||
| 1226 | uint page; | ||
| 1227 |
41/224✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 9105408 times.
✓ Branch 33 taken 272358 times.
✓ Branch 34 taken 62208 times.
✓ Branch 35 taken 9043200 times.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 272736543 times.
✓ Branch 41 taken 13388850 times.
✓ Branch 42 taken 1866441 times.
✓ Branch 43 taken 270870102 times.
✓ Branch 44 taken 9105489 times.
✓ Branch 45 taken 461160 times.
✓ Branch 46 taken 62208 times.
✓ Branch 47 taken 9043281 times.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 6070326 times.
✓ Branch 53 taken 307620 times.
✓ Branch 54 taken 41472 times.
✓ Branch 55 taken 6028854 times.
✓ Branch 56 taken 84983808 times.
✓ Branch 57 taken 4300800 times.
✓ Branch 58 taken 580608 times.
✓ Branch 59 taken 84403200 times.
✓ Branch 60 taken 3041515 times.
✓ Branch 61 taken 154233 times.
✓ Branch 62 taken 20736 times.
✓ Branch 63 taken 3020779 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✓ Branch 124 taken 2 times.
✓ Branch 125 taken 24 times.
✗ Branch 126 not taken.
✓ Branch 127 taken 2 times.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✓ Branch 188 taken 139 times.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✓ Branch 191 taken 139 times.
✓ Branch 192 taken 119 times.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✓ Branch 195 taken 119 times.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✓ Branch 200 taken 138 times.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✓ Branch 203 taken 138 times.
✓ Branch 204 taken 117 times.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✓ Branch 207 taken 117 times.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✓ Branch 212 taken 4328 times.
✓ Branch 213 taken 3706 times.
✗ Branch 214 not taken.
✓ Branch 215 taken 4328 times.
✓ Branch 216 taken 7191 times.
✓ Branch 217 taken 3638 times.
✗ Branch 218 not taken.
✓ Branch 219 taken 7191 times.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
|
403947512 | if (ch >= 0x17000 && ch <= 0x18AFF) // Tangut character |
| 1228 | { | ||
| 1229 | 2633673 | page = 0xFB00; | |
| 1230 | 2633673 | implicit[3] = (ch - 0x17000) | 0x8000; | |
| 1231 | } else { | ||
| 1232 | 401313839 | page = ch >> 15; | |
| 1233 | 401313839 | implicit[3] = (ch & 0x7FFF) | 0x8000; | |
| 1234 |
106/560✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✓ Branch 80 taken 9315558 times.
✗ Branch 81 not taken.
✓ Branch 82 taken 9257472 times.
✓ Branch 83 taken 58086 times.
✓ Branch 84 taken 8835840 times.
✓ Branch 85 taken 421632 times.
✓ Branch 86 taken 8451441 times.
✓ Branch 87 taken 384399 times.
✓ Branch 88 taken 8451072 times.
✓ Branch 89 taken 422001 times.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✓ Branch 100 taken 284258952 times.
✗ Branch 101 not taken.
✓ Branch 102 taken 282588246 times.
✓ Branch 103 taken 1670706 times.
✓ Branch 104 taken 264649302 times.
✓ Branch 105 taken 17938944 times.
✓ Branch 106 taken 253501731 times.
✓ Branch 107 taken 11147571 times.
✓ Branch 108 taken 253491030 times.
✓ Branch 109 taken 17949645 times.
✓ Branch 110 taken 9504441 times.
✗ Branch 111 not taken.
✓ Branch 112 taken 9446841 times.
✓ Branch 113 taken 57600 times.
✓ Branch 114 taken 8835897 times.
✓ Branch 115 taken 610944 times.
✓ Branch 116 taken 8451498 times.
✓ Branch 117 taken 384399 times.
✓ Branch 118 taken 8451129 times.
✓ Branch 119 taken 611313 times.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✓ Branch 130 taken 6336474 times.
✗ Branch 131 not taken.
✓ Branch 132 taken 6298074 times.
✓ Branch 133 taken 38400 times.
✓ Branch 134 taken 5890598 times.
✓ Branch 135 taken 407476 times.
✓ Branch 136 taken 5634332 times.
✓ Branch 137 taken 256266 times.
✓ Branch 138 taken 5634086 times.
✓ Branch 139 taken 407722 times.
✓ Branch 140 taken 88704000 times.
✗ Branch 141 not taken.
✓ Branch 142 taken 88166400 times.
✓ Branch 143 taken 537600 times.
✓ Branch 144 taken 82467840 times.
✓ Branch 145 taken 5698560 times.
✓ Branch 146 taken 78880116 times.
✓ Branch 147 taken 3587724 times.
✓ Branch 148 taken 78876672 times.
✓ Branch 149 taken 5702004 times.
✓ Branch 150 taken 3175012 times.
✗ Branch 151 not taken.
✓ Branch 152 taken 3155810 times.
✓ Branch 153 taken 19202 times.
✓ Branch 154 taken 2951651 times.
✓ Branch 155 taken 204159 times.
✓ Branch 156 taken 2823515 times.
✓ Branch 157 taken 128136 times.
✓ Branch 158 taken 2823392 times.
✓ Branch 159 taken 204282 times.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✗ Branch 248 not taken.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✗ Branch 251 not taken.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✗ Branch 255 not taken.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✗ Branch 264 not taken.
✗ Branch 265 not taken.
✗ Branch 266 not taken.
✗ Branch 267 not taken.
✗ Branch 268 not taken.
✗ Branch 269 not taken.
✗ Branch 270 not taken.
✗ Branch 271 not taken.
✗ Branch 272 not taken.
✗ Branch 273 not taken.
✗ Branch 274 not taken.
✗ Branch 275 not taken.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✓ Branch 310 taken 26 times.
✗ Branch 311 not taken.
✓ Branch 312 taken 26 times.
✗ Branch 313 not taken.
✓ Branch 314 taken 2 times.
✓ Branch 315 taken 24 times.
✗ Branch 316 not taken.
✓ Branch 317 taken 2 times.
✗ Branch 318 not taken.
✓ Branch 319 taken 24 times.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✗ Branch 336 not taken.
✗ Branch 337 not taken.
✗ Branch 338 not taken.
✗ Branch 339 not taken.
✗ Branch 340 not taken.
✗ Branch 341 not taken.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✗ Branch 376 not taken.
✗ Branch 377 not taken.
✗ Branch 378 not taken.
✗ Branch 379 not taken.
✗ Branch 380 not taken.
✗ Branch 381 not taken.
✗ Branch 382 not taken.
✗ Branch 383 not taken.
✗ Branch 384 not taken.
✗ Branch 385 not taken.
✗ Branch 386 not taken.
✗ Branch 387 not taken.
✗ Branch 388 not taken.
✗ Branch 389 not taken.
✗ Branch 390 not taken.
✗ Branch 391 not taken.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✗ Branch 400 not taken.
✗ Branch 401 not taken.
✗ Branch 402 not taken.
✗ Branch 403 not taken.
✗ Branch 404 not taken.
✗ Branch 405 not taken.
✗ Branch 406 not taken.
✗ Branch 407 not taken.
✗ Branch 408 not taken.
✗ Branch 409 not taken.
✗ Branch 410 not taken.
✗ Branch 411 not taken.
✗ Branch 412 not taken.
✗ Branch 413 not taken.
✗ Branch 414 not taken.
✗ Branch 415 not taken.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✗ Branch 424 not taken.
✗ Branch 425 not taken.
✗ Branch 426 not taken.
✗ Branch 427 not taken.
✗ Branch 428 not taken.
✗ Branch 429 not taken.
✗ Branch 430 not taken.
✗ Branch 431 not taken.
✗ Branch 432 not taken.
✗ Branch 433 not taken.
✗ Branch 434 not taken.
✗ Branch 435 not taken.
✗ Branch 436 not taken.
✗ Branch 437 not taken.
✗ Branch 438 not taken.
✗ Branch 439 not taken.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
✗ Branch 444 not taken.
✗ Branch 445 not taken.
✗ Branch 446 not taken.
✗ Branch 447 not taken.
✗ Branch 448 not taken.
✗ Branch 449 not taken.
✗ Branch 450 not taken.
✗ Branch 451 not taken.
✗ Branch 452 not taken.
✗ Branch 453 not taken.
✗ Branch 454 not taken.
✗ Branch 455 not taken.
✗ Branch 456 not taken.
✗ Branch 457 not taken.
✗ Branch 458 not taken.
✗ Branch 459 not taken.
✗ Branch 460 not taken.
✗ Branch 461 not taken.
✗ Branch 462 not taken.
✗ Branch 463 not taken.
✗ Branch 464 not taken.
✗ Branch 465 not taken.
✗ Branch 466 not taken.
✗ Branch 467 not taken.
✗ Branch 468 not taken.
✗ Branch 469 not taken.
✓ Branch 470 taken 139 times.
✗ Branch 471 not taken.
✓ Branch 472 taken 139 times.
✗ Branch 473 not taken.
✓ Branch 474 taken 100 times.
✓ Branch 475 taken 39 times.
✓ Branch 476 taken 100 times.
✗ Branch 477 not taken.
✓ Branch 478 taken 100 times.
✓ Branch 479 taken 39 times.
✓ Branch 480 taken 119 times.
✗ Branch 481 not taken.
✓ Branch 482 taken 119 times.
✗ Branch 483 not taken.
✓ Branch 484 taken 94 times.
✓ Branch 485 taken 25 times.
✓ Branch 486 taken 94 times.
✗ Branch 487 not taken.
✓ Branch 488 taken 94 times.
✓ Branch 489 taken 25 times.
✗ Branch 490 not taken.
✗ Branch 491 not taken.
✗ Branch 492 not taken.
✗ Branch 493 not taken.
✗ Branch 494 not taken.
✗ Branch 495 not taken.
✗ Branch 496 not taken.
✗ Branch 497 not taken.
✗ Branch 498 not taken.
✗ Branch 499 not taken.
✓ Branch 500 taken 138 times.
✗ Branch 501 not taken.
✓ Branch 502 taken 138 times.
✗ Branch 503 not taken.
✓ Branch 504 taken 99 times.
✓ Branch 505 taken 39 times.
✓ Branch 506 taken 99 times.
✗ Branch 507 not taken.
✓ Branch 508 taken 99 times.
✓ Branch 509 taken 39 times.
✓ Branch 510 taken 117 times.
✗ Branch 511 not taken.
✓ Branch 512 taken 117 times.
✗ Branch 513 not taken.
✓ Branch 514 taken 93 times.
✓ Branch 515 taken 24 times.
✓ Branch 516 taken 93 times.
✗ Branch 517 not taken.
✓ Branch 518 taken 93 times.
✓ Branch 519 taken 24 times.
✗ Branch 520 not taken.
✗ Branch 521 not taken.
✗ Branch 522 not taken.
✗ Branch 523 not taken.
✗ Branch 524 not taken.
✗ Branch 525 not taken.
✗ Branch 526 not taken.
✗ Branch 527 not taken.
✗ Branch 528 not taken.
✗ Branch 529 not taken.
✓ Branch 530 taken 8034 times.
✗ Branch 531 not taken.
✓ Branch 532 taken 7785 times.
✓ Branch 533 taken 249 times.
✓ Branch 534 taken 4289 times.
✓ Branch 535 taken 3496 times.
✓ Branch 536 taken 3637 times.
✓ Branch 537 taken 652 times.
✓ Branch 538 taken 3637 times.
✓ Branch 539 taken 3496 times.
✓ Branch 540 taken 10829 times.
✗ Branch 541 not taken.
✓ Branch 542 taken 10631 times.
✓ Branch 543 taken 198 times.
✓ Branch 544 taken 7167 times.
✓ Branch 545 taken 3464 times.
✓ Branch 546 taken 6541 times.
✓ Branch 547 taken 626 times.
✓ Branch 548 taken 6541 times.
✓ Branch 549 taken 3464 times.
✗ Branch 550 not taken.
✗ Branch 551 not taken.
✗ Branch 552 not taken.
✗ Branch 553 not taken.
✗ Branch 554 not taken.
✗ Branch 555 not taken.
✗ Branch 556 not taken.
✗ Branch 557 not taken.
✗ Branch 558 not taken.
✗ Branch 559 not taken.
|
401313839 | if ((ch >= 0x3400 && ch <= 0x4DB5) || (ch >= 0x20000 && ch <= 0x2A6D6) || |
| 1235 |
92/448✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✓ Branch 64 taken 8413731 times.
✓ Branch 65 taken 37341 times.
✓ Branch 66 taken 8413632 times.
✓ Branch 67 taken 422100 times.
✓ Branch 68 taken 8411634 times.
✓ Branch 69 taken 1998 times.
✓ Branch 70 taken 8411616 times.
✓ Branch 71 taken 422118 times.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✓ Branch 80 taken 252398448 times.
✓ Branch 81 taken 1092582 times.
✓ Branch 82 taken 252395478 times.
✓ Branch 83 taken 17952615 times.
✓ Branch 84 taken 252335808 times.
✓ Branch 85 taken 59670 times.
✓ Branch 86 taken 252335286 times.
✓ Branch 87 taken 17953137 times.
✓ Branch 88 taken 8413788 times.
✓ Branch 89 taken 37341 times.
✓ Branch 90 taken 8413689 times.
✓ Branch 91 taken 611412 times.
✓ Branch 92 taken 8411667 times.
✓ Branch 93 taken 2022 times.
✓ Branch 94 taken 8411649 times.
✓ Branch 95 taken 611430 times.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✓ Branch 104 taken 5609192 times.
✓ Branch 105 taken 24894 times.
✓ Branch 106 taken 5609126 times.
✓ Branch 107 taken 407788 times.
✓ Branch 108 taken 5607778 times.
✓ Branch 109 taken 1348 times.
✓ Branch 110 taken 5607766 times.
✓ Branch 111 taken 407800 times.
✓ Branch 112 taken 78528156 times.
✓ Branch 113 taken 348516 times.
✓ Branch 114 taken 78527232 times.
✓ Branch 115 taken 5702928 times.
✓ Branch 116 taken 78508584 times.
✓ Branch 117 taken 18648 times.
✓ Branch 118 taken 78508416 times.
✓ Branch 119 taken 5703096 times.
✓ Branch 120 taken 2810945 times.
✓ Branch 121 taken 12447 times.
✓ Branch 122 taken 2810912 times.
✓ Branch 123 taken 204315 times.
✓ Branch 124 taken 2810238 times.
✓ Branch 125 taken 674 times.
✓ Branch 126 taken 2810232 times.
✓ Branch 127 taken 204321 times.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✗ Branch 248 not taken.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✓ Branch 251 taken 24 times.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✓ Branch 255 taken 24 times.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✗ Branch 264 not taken.
✗ Branch 265 not taken.
✗ Branch 266 not taken.
✗ Branch 267 not taken.
✗ Branch 268 not taken.
✗ Branch 269 not taken.
✗ Branch 270 not taken.
✗ Branch 271 not taken.
✗ Branch 272 not taken.
✗ Branch 273 not taken.
✗ Branch 274 not taken.
✗ Branch 275 not taken.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✗ Branch 310 not taken.
✗ Branch 311 not taken.
✗ Branch 312 not taken.
✗ Branch 313 not taken.
✗ Branch 314 not taken.
✗ Branch 315 not taken.
✗ Branch 316 not taken.
✗ Branch 317 not taken.
✗ Branch 318 not taken.
✗ Branch 319 not taken.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✗ Branch 336 not taken.
✗ Branch 337 not taken.
✗ Branch 338 not taken.
✗ Branch 339 not taken.
✗ Branch 340 not taken.
✗ Branch 341 not taken.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✓ Branch 376 taken 100 times.
✗ Branch 377 not taken.
✓ Branch 378 taken 100 times.
✓ Branch 379 taken 39 times.
✓ Branch 380 taken 56 times.
✓ Branch 381 taken 44 times.
✓ Branch 382 taken 56 times.
✓ Branch 383 taken 39 times.
✓ Branch 384 taken 94 times.
✗ Branch 385 not taken.
✓ Branch 386 taken 94 times.
✓ Branch 387 taken 25 times.
✓ Branch 388 taken 61 times.
✓ Branch 389 taken 33 times.
✓ Branch 390 taken 61 times.
✓ Branch 391 taken 25 times.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✓ Branch 400 taken 99 times.
✗ Branch 401 not taken.
✓ Branch 402 taken 99 times.
✓ Branch 403 taken 39 times.
✓ Branch 404 taken 55 times.
✓ Branch 405 taken 44 times.
✓ Branch 406 taken 55 times.
✓ Branch 407 taken 39 times.
✓ Branch 408 taken 93 times.
✗ Branch 409 not taken.
✓ Branch 410 taken 93 times.
✓ Branch 411 taken 24 times.
✓ Branch 412 taken 60 times.
✓ Branch 413 taken 33 times.
✓ Branch 414 taken 60 times.
✓ Branch 415 taken 24 times.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✓ Branch 424 taken 3637 times.
✗ Branch 425 not taken.
✓ Branch 426 taken 3637 times.
✓ Branch 427 taken 3496 times.
✓ Branch 428 taken 3593 times.
✓ Branch 429 taken 44 times.
✓ Branch 430 taken 3593 times.
✓ Branch 431 taken 3496 times.
✓ Branch 432 taken 6541 times.
✗ Branch 433 not taken.
✓ Branch 434 taken 6541 times.
✓ Branch 435 taken 3464 times.
✓ Branch 436 taken 6508 times.
✓ Branch 437 taken 33 times.
✓ Branch 438 taken 6508 times.
✓ Branch 439 taken 3464 times.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
✗ Branch 444 not taken.
✗ Branch 445 not taken.
✗ Branch 446 not taken.
✗ Branch 447 not taken.
|
383042023 | (ch >= 0x2A700 && ch <= 0x2B734) || (ch >= 0x2B740 && ch <= 0x2B81D) || |
| 1236 |
24/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 51858 times.
✓ Branch 17 taken 8359758 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 1543050 times.
✓ Branch 21 taken 250792236 times.
✓ Branch 22 taken 51882 times.
✓ Branch 23 taken 8359767 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 34588 times.
✓ Branch 27 taken 5573178 times.
✓ Branch 28 taken 484008 times.
✓ Branch 29 taken 78024408 times.
✓ Branch 30 taken 17294 times.
✓ Branch 31 taken 2792938 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 31 times.
✓ Branch 95 taken 25 times.
✓ Branch 96 taken 44 times.
✓ Branch 97 taken 17 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✓ Branch 100 taken 31 times.
✓ Branch 101 taken 24 times.
✓ Branch 102 taken 44 times.
✓ Branch 103 taken 16 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 31 times.
✓ Branch 107 taken 3562 times.
✓ Branch 108 taken 44 times.
✓ Branch 109 taken 6464 times.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
|
356095298 | (ch >= 0x2B820 && ch <= 0x2CEA1)) { |
| 1237 | 22092433 | page += 0xFB80; | |
| 1238 |
63/448✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✓ Branch 64 taken 8781876 times.
✗ Branch 65 not taken.
✓ Branch 66 taken 8781876 times.
✗ Branch 67 not taken.
✓ Branch 68 taken 8724276 times.
✓ Branch 69 taken 57600 times.
✗ Branch 70 not taken.
✓ Branch 71 taken 8724276 times.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✓ Branch 80 taken 268745373 times.
✗ Branch 81 not taken.
✓ Branch 82 taken 263465973 times.
✓ Branch 83 taken 5279400 times.
✓ Branch 84 taken 261727389 times.
✓ Branch 85 taken 1738584 times.
✗ Branch 86 not taken.
✓ Branch 87 taken 261727389 times.
✓ Branch 88 taken 8971197 times.
✗ Branch 89 not taken.
✓ Branch 90 taken 8782287 times.
✓ Branch 91 taken 188910 times.
✓ Branch 92 taken 8724309 times.
✓ Branch 93 taken 57978 times.
✗ Branch 94 not taken.
✓ Branch 95 taken 8724309 times.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✓ Branch 104 taken 5980978 times.
✗ Branch 105 not taken.
✓ Branch 106 taken 5854858 times.
✓ Branch 107 taken 126120 times.
✓ Branch 108 taken 5816206 times.
✓ Branch 109 taken 38652 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 5816206 times.
✓ Branch 112 taken 83727504 times.
✗ Branch 113 not taken.
✓ Branch 114 taken 81967704 times.
✓ Branch 115 taken 1759800 times.
✓ Branch 116 taken 81426576 times.
✓ Branch 117 taken 541128 times.
✗ Branch 118 not taken.
✓ Branch 119 taken 81426576 times.
✓ Branch 120 taken 2997259 times.
✗ Branch 121 not taken.
✓ Branch 122 taken 2933778 times.
✓ Branch 123 taken 63481 times.
✓ Branch 124 taken 2914452 times.
✓ Branch 125 taken 19326 times.
✗ Branch 126 not taken.
✓ Branch 127 taken 2914452 times.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
✗ Branch 224 not taken.
✗ Branch 225 not taken.
✗ Branch 226 not taken.
✗ Branch 227 not taken.
✗ Branch 228 not taken.
✗ Branch 229 not taken.
✗ Branch 230 not taken.
✗ Branch 231 not taken.
✗ Branch 232 not taken.
✗ Branch 233 not taken.
✗ Branch 234 not taken.
✗ Branch 235 not taken.
✗ Branch 236 not taken.
✗ Branch 237 not taken.
✗ Branch 238 not taken.
✗ Branch 239 not taken.
✗ Branch 240 not taken.
✗ Branch 241 not taken.
✗ Branch 242 not taken.
✗ Branch 243 not taken.
✗ Branch 244 not taken.
✗ Branch 245 not taken.
✗ Branch 246 not taken.
✗ Branch 247 not taken.
✓ Branch 248 taken 24 times.
✗ Branch 249 not taken.
✗ Branch 250 not taken.
✓ Branch 251 taken 24 times.
✗ Branch 252 not taken.
✗ Branch 253 not taken.
✗ Branch 254 not taken.
✗ Branch 255 not taken.
✗ Branch 256 not taken.
✗ Branch 257 not taken.
✗ Branch 258 not taken.
✗ Branch 259 not taken.
✗ Branch 260 not taken.
✗ Branch 261 not taken.
✗ Branch 262 not taken.
✗ Branch 263 not taken.
✗ Branch 264 not taken.
✗ Branch 265 not taken.
✗ Branch 266 not taken.
✗ Branch 267 not taken.
✗ Branch 268 not taken.
✗ Branch 269 not taken.
✗ Branch 270 not taken.
✗ Branch 271 not taken.
✗ Branch 272 not taken.
✗ Branch 273 not taken.
✗ Branch 274 not taken.
✗ Branch 275 not taken.
✗ Branch 276 not taken.
✗ Branch 277 not taken.
✗ Branch 278 not taken.
✗ Branch 279 not taken.
✗ Branch 280 not taken.
✗ Branch 281 not taken.
✗ Branch 282 not taken.
✗ Branch 283 not taken.
✗ Branch 284 not taken.
✗ Branch 285 not taken.
✗ Branch 286 not taken.
✗ Branch 287 not taken.
✗ Branch 288 not taken.
✗ Branch 289 not taken.
✗ Branch 290 not taken.
✗ Branch 291 not taken.
✗ Branch 292 not taken.
✗ Branch 293 not taken.
✗ Branch 294 not taken.
✗ Branch 295 not taken.
✗ Branch 296 not taken.
✗ Branch 297 not taken.
✗ Branch 298 not taken.
✗ Branch 299 not taken.
✗ Branch 300 not taken.
✗ Branch 301 not taken.
✗ Branch 302 not taken.
✗ Branch 303 not taken.
✗ Branch 304 not taken.
✗ Branch 305 not taken.
✗ Branch 306 not taken.
✗ Branch 307 not taken.
✗ Branch 308 not taken.
✗ Branch 309 not taken.
✗ Branch 310 not taken.
✗ Branch 311 not taken.
✗ Branch 312 not taken.
✗ Branch 313 not taken.
✗ Branch 314 not taken.
✗ Branch 315 not taken.
✗ Branch 316 not taken.
✗ Branch 317 not taken.
✗ Branch 318 not taken.
✗ Branch 319 not taken.
✗ Branch 320 not taken.
✗ Branch 321 not taken.
✗ Branch 322 not taken.
✗ Branch 323 not taken.
✗ Branch 324 not taken.
✗ Branch 325 not taken.
✗ Branch 326 not taken.
✗ Branch 327 not taken.
✗ Branch 328 not taken.
✗ Branch 329 not taken.
✗ Branch 330 not taken.
✗ Branch 331 not taken.
✗ Branch 332 not taken.
✗ Branch 333 not taken.
✗ Branch 334 not taken.
✗ Branch 335 not taken.
✗ Branch 336 not taken.
✗ Branch 337 not taken.
✗ Branch 338 not taken.
✗ Branch 339 not taken.
✗ Branch 340 not taken.
✗ Branch 341 not taken.
✗ Branch 342 not taken.
✗ Branch 343 not taken.
✗ Branch 344 not taken.
✗ Branch 345 not taken.
✗ Branch 346 not taken.
✗ Branch 347 not taken.
✗ Branch 348 not taken.
✗ Branch 349 not taken.
✗ Branch 350 not taken.
✗ Branch 351 not taken.
✗ Branch 352 not taken.
✗ Branch 353 not taken.
✗ Branch 354 not taken.
✗ Branch 355 not taken.
✗ Branch 356 not taken.
✗ Branch 357 not taken.
✗ Branch 358 not taken.
✗ Branch 359 not taken.
✗ Branch 360 not taken.
✗ Branch 361 not taken.
✗ Branch 362 not taken.
✗ Branch 363 not taken.
✗ Branch 364 not taken.
✗ Branch 365 not taken.
✗ Branch 366 not taken.
✗ Branch 367 not taken.
✗ Branch 368 not taken.
✗ Branch 369 not taken.
✗ Branch 370 not taken.
✗ Branch 371 not taken.
✗ Branch 372 not taken.
✗ Branch 373 not taken.
✗ Branch 374 not taken.
✗ Branch 375 not taken.
✓ Branch 376 taken 64 times.
✗ Branch 377 not taken.
✓ Branch 378 taken 64 times.
✗ Branch 379 not taken.
✓ Branch 380 taken 64 times.
✗ Branch 381 not taken.
✗ Branch 382 not taken.
✓ Branch 383 taken 64 times.
✓ Branch 384 taken 42 times.
✗ Branch 385 not taken.
✓ Branch 386 taken 42 times.
✗ Branch 387 not taken.
✓ Branch 388 taken 42 times.
✗ Branch 389 not taken.
✗ Branch 390 not taken.
✓ Branch 391 taken 42 times.
✗ Branch 392 not taken.
✗ Branch 393 not taken.
✗ Branch 394 not taken.
✗ Branch 395 not taken.
✗ Branch 396 not taken.
✗ Branch 397 not taken.
✗ Branch 398 not taken.
✗ Branch 399 not taken.
✓ Branch 400 taken 63 times.
✗ Branch 401 not taken.
✓ Branch 402 taken 63 times.
✗ Branch 403 not taken.
✓ Branch 404 taken 63 times.
✗ Branch 405 not taken.
✗ Branch 406 not taken.
✓ Branch 407 taken 63 times.
✓ Branch 408 taken 40 times.
✗ Branch 409 not taken.
✓ Branch 410 taken 40 times.
✗ Branch 411 not taken.
✓ Branch 412 taken 40 times.
✗ Branch 413 not taken.
✗ Branch 414 not taken.
✓ Branch 415 taken 40 times.
✗ Branch 416 not taken.
✗ Branch 417 not taken.
✗ Branch 418 not taken.
✗ Branch 419 not taken.
✗ Branch 420 not taken.
✗ Branch 421 not taken.
✗ Branch 422 not taken.
✗ Branch 423 not taken.
✓ Branch 424 taken 7058 times.
✗ Branch 425 not taken.
✓ Branch 426 taken 3601 times.
✓ Branch 427 taken 3457 times.
✓ Branch 428 taken 3601 times.
✗ Branch 429 not taken.
✗ Branch 430 not taken.
✓ Branch 431 taken 3601 times.
✓ Branch 432 taken 9928 times.
✗ Branch 433 not taken.
✓ Branch 434 taken 6488 times.
✓ Branch 435 taken 3440 times.
✓ Branch 436 taken 6488 times.
✗ Branch 437 not taken.
✗ Branch 438 not taken.
✓ Branch 439 taken 6488 times.
✗ Branch 440 not taken.
✗ Branch 441 not taken.
✗ Branch 442 not taken.
✗ Branch 443 not taken.
✗ Branch 444 not taken.
✗ Branch 445 not taken.
✗ Branch 446 not taken.
✗ Branch 447 not taken.
|
379221406 | } else if ((ch >= 0x4E00 && ch <= 0x9FD5) || (ch >= 0xFA0E && ch <= 0xFA29)) |
| 1239 | 7424632 | page += 0xFB40; | |
| 1240 | else | ||
| 1241 | 371796774 | page += 0xFBC0; | |
| 1242 | } | ||
| 1243 |
14/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 9377766 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 8893983 times.
✓ Branch 21 taken 277231410 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 9566649 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 6377946 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 89284608 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 3195748 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✓ Branch 63 taken 26 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✓ Branch 95 taken 139 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 119 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✓ Branch 101 taken 138 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 117 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✓ Branch 107 taken 8034 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 10829 times.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
|
403947512 | if (cs->coll_param == &zh_coll_param) { |
| 1244 | 8893983 | page = change_zh_implicit(page); | |
| 1245 | } | ||
| 1246 | 403947512 | implicit[0] = page; | |
| 1247 | 403947512 | implicit[1] = 0x0020; | |
| 1248 | 403947512 | implicit[2] = 0x0002; | |
| 1249 | // implicit[3] is set above. | ||
| 1250 | 403947512 | implicit[4] = 0; | |
| 1251 | 403947512 | implicit[5] = 0; | |
| 1252 | 403947512 | num_of_ce_left = 1; | |
| 1253 | 403947512 | wbeg = implicit + MY_UCA_900_CE_SIZE + weight_lv; | |
| 1254 | 403947512 | wbeg_stride = MY_UCA_900_CE_SIZE; | |
| 1255 | |||
| 1256 | 403947512 | return *(implicit + weight_lv); | |
| 1257 | } | ||
| 1258 | |||
| 1259 | /** | ||
| 1260 | Return implicit UCA weight | ||
| 1261 | Used for code points that do not have assigned UCA weights. | ||
| 1262 | |||
| 1263 | @return The leading implicit weight. | ||
| 1264 | */ | ||
| 1265 | |||
| 1266 | template <class Mb_wc> | ||
| 1267 | ALWAYS_INLINE int uca_scanner_any<Mb_wc>::next_implicit(my_wc_t ch) { | ||
| 1268 | 30495286 | implicit[0] = (ch & 0x7FFF) | 0x8000; | |
| 1269 | 30495286 | implicit[1] = 0; | |
| 1270 | 30495286 | wbeg = implicit; | |
| 1271 | 30495286 | wbeg_stride = MY_UCA_900_CE_SIZE; | |
| 1272 | |||
| 1273 | 30495286 | uint page = ch >> 15; | |
| 1274 | |||
| 1275 |
10/36✓ Branch 0 taken 1862400 times.
✓ Branch 1 taken 21439551 times.
✓ Branch 2 taken 460800 times.
✓ Branch 3 taken 6022659 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 372 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 357 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 12800 times.
✓ Branch 17 taken 76815 times.
✓ Branch 18 taken 12800 times.
✓ Branch 19 taken 76812 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
|
29965366 | if (ch >= 0x3400 && ch <= 0x4DB5) |
| 1276 | 2348800 | page += 0xFB80; | |
| 1277 |
18/72✓ Branch 0 taken 21439551 times.
✓ Branch 1 taken 423936 times.
✓ Branch 2 taken 6137832 times.
✓ Branch 3 taken 15301719 times.
✓ Branch 4 taken 6022659 times.
✓ Branch 5 taken 105984 times.
✓ Branch 6 taken 1504944 times.
✓ Branch 7 taken 4517715 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 372 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✓ Branch 19 taken 372 times.
✓ Branch 20 taken 357 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✓ Branch 23 taken 357 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 76815 times.
✗ Branch 33 not taken.
✓ Branch 34 taken 41804 times.
✓ Branch 35 taken 35011 times.
✓ Branch 36 taken 76812 times.
✗ Branch 37 not taken.
✓ Branch 38 taken 41804 times.
✓ Branch 39 taken 35008 times.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
|
28146486 | else if (ch >= 0x4E00 && ch <= 0x9FA5) |
| 1278 | 7726384 | page += 0xFB40; | |
| 1279 | else | ||
| 1280 | 20420102 | page += 0xFBC0; | |
| 1281 | |||
| 1282 | 30495286 | return page; | |
| 1283 | } | ||
| 1284 | |||
| 1285 | template <class Mb_wc> | ||
| 1286 | ALWAYS_INLINE int uca_scanner_any<Mb_wc>::next() { | ||
| 1287 | /* | ||
| 1288 | Check if the weights for the previous code point have been | ||
| 1289 | already fully scanned. If yes, then get the next code point and | ||
| 1290 | initialize wbeg and wlength to its weight string. | ||
| 1291 | */ | ||
| 1292 | |||
| 1293 |
16/28✓ Branch 0 taken 25421620 times.
✓ Branch 1 taken 347255757 times.
✓ Branch 2 taken 7002842 times.
✓ Branch 3 taken 160781076 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 102026 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 26946 times.
✓ Branch 8 taken 4312 times.
✓ Branch 9 taken 7436495 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 368 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 1428 times.
✓ Branch 14 taken 101974 times.
✓ Branch 15 taken 290308 times.
✗ Branch 16 not taken.
✓ Branch 17 taken 8 times.
✗ Branch 18 not taken.
✓ Branch 19 taken 20 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 98 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 84 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
548425362 | if (wbeg[0]) /* More weights left from the previous step: */ |
| 1294 | 32635856 | return *wbeg++; /* return the next weight from expansion */ | |
| 1295 | |||
| 1296 | do { | ||
| 1297 | 523830696 | my_wc_t wc = 0; | |
| 1298 | |||
| 1299 | /* Get next code point */ | ||
| 1300 |
23/36✓ Branch 0 taken 347425992 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 80229898 times.
✓ Branch 3 taken 80586689 times.
✓ Branch 4 taken 102026 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 648 times.
✓ Branch 7 taken 26298 times.
✓ Branch 8 taken 7421784 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 7436921 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 370 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1428 times.
✗ Branch 15 not taken.
✓ Branch 16 taken 141298 times.
✓ Branch 17 taken 150340 times.
✓ Branch 18 taken 136936 times.
✓ Branch 19 taken 155155 times.
✓ Branch 20 taken 5 times.
✓ Branch 21 taken 4 times.
✓ Branch 22 taken 20 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 100 times.
✗ Branch 25 not taken.
✓ Branch 26 taken 98 times.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 30 times.
✓ Branch 31 taken 54 times.
✓ Branch 32 taken 30 times.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
|
523830696 | int mblen = mb_wc(&wc, sbeg, send); |
| 1301 |
31/36✓ Branch 0 taken 173831771 times.
✓ Branch 1 taken 173594221 times.
✓ Branch 2 taken 80672266 times.
✓ Branch 3 taken 80144321 times.
✓ Branch 4 taken 2136 times.
✓ Branch 5 taken 99890 times.
✓ Branch 6 taken 648 times.
✓ Branch 7 taken 26298 times.
✓ Branch 8 taken 113777 times.
✓ Branch 9 taken 7308007 times.
✓ Branch 10 taken 113591 times.
✓ Branch 11 taken 7323330 times.
✓ Branch 12 taken 232 times.
✓ Branch 13 taken 138 times.
✓ Branch 14 taken 418 times.
✓ Branch 15 taken 1010 times.
✓ Branch 16 taken 141298 times.
✓ Branch 17 taken 150340 times.
✓ Branch 18 taken 136936 times.
✓ Branch 19 taken 155155 times.
✓ Branch 20 taken 5 times.
✓ Branch 21 taken 4 times.
✓ Branch 22 taken 20 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 19 times.
✓ Branch 25 taken 81 times.
✓ Branch 26 taken 19 times.
✓ Branch 27 taken 79 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 30 times.
✓ Branch 31 taken 54 times.
✓ Branch 32 taken 30 times.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
|
523816178 | if (mblen <= 0) { |
| 1302 | 255013196 | ++weight_lv; | |
| 1303 |
19/36✓ Branch 0 taken 173389403 times.
✓ Branch 1 taken 442368 times.
✓ Branch 2 taken 80229898 times.
✓ Branch 3 taken 442368 times.
✓ Branch 4 taken 2136 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 648 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 113778 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 112565 times.
✓ Branch 11 taken 1026 times.
✓ Branch 12 taken 232 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 418 times.
✗ Branch 15 not taken.
✓ Branch 16 taken 141298 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 136936 times.
✗ Branch 19 not taken.
✓ Branch 20 taken 5 times.
✗ Branch 21 not taken.
✓ Branch 22 taken 20 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 19 times.
✗ Branch 25 not taken.
✓ Branch 26 taken 19 times.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 30 times.
✗ Branch 31 not taken.
✓ Branch 32 taken 30 times.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
|
503452146 | if (sbeg >= send) return -1; /* No more bytes, end of line reached */ |
| 1304 | /* | ||
| 1305 | There are some more bytes left. Non-positive mb_len means that | ||
| 1306 | we got an incomplete or a bad byte sequence. Consume mbminlen bytes. | ||
| 1307 | */ | ||
| 1308 |
3/36✗ Branch 0 not taken.
✓ Branch 1 taken 442368 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 442368 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1026 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
|
885761 | if ((sbeg += cs->mbminlen) > send) { |
| 1309 | /* For safety purposes don't go beyond the string range. */ | ||
| 1310 | ✗ | sbeg = send; | |
| 1311 | } | ||
| 1312 | /* | ||
| 1313 | Treat every complete or incomplete mbminlen unit as a weight which is | ||
| 1314 | greater than weight for any possible normal character. | ||
| 1315 | 0xFFFF is greater than any possible weight in the UCA weight table. | ||
| 1316 | */ | ||
| 1317 | 885761 | return 0xFFFF; | |
| 1318 | } | ||
| 1319 | |||
| 1320 | 268802982 | sbeg += mblen; | |
| 1321 | 268802982 | char_index++; | |
| 1322 |
18/36✓ Branch 0 taken 144703492 times.
✓ Branch 1 taken 28890729 times.
✓ Branch 2 taken 72351751 times.
✓ Branch 3 taken 7792570 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 99890 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 26298 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 7308007 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 7323330 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 138 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 1010 times.
✗ Branch 16 not taken.
✓ Branch 17 taken 150340 times.
✓ Branch 18 taken 1 times.
✓ Branch 19 taken 155154 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 4 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 81 times.
✗ Branch 26 not taken.
✓ Branch 27 taken 79 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 54 times.
✗ Branch 32 not taken.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
|
268802982 | if (wc > uca->maxchar) { |
| 1323 | /* Return 0xFFFD as weight for all characters outside BMP */ | ||
| 1324 | 217055244 | wbeg = nochar; | |
| 1325 | 217055244 | wbeg_stride = 0; | |
| 1326 | 217055244 | return 0xFFFD; | |
| 1327 | } | ||
| 1328 | |||
| 1329 |
24/36✓ Branch 0 taken 4798552 times.
✓ Branch 1 taken 24092177 times.
✓ Branch 2 taken 1161710 times.
✓ Branch 3 taken 6630860 times.
✓ Branch 4 taken 35488 times.
✓ Branch 5 taken 64402 times.
✓ Branch 6 taken 8913 times.
✓ Branch 7 taken 17385 times.
✓ Branch 8 taken 4441 times.
✓ Branch 9 taken 7309177 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 7308254 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 138 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 1010 times.
✓ Branch 16 taken 4263 times.
✓ Branch 17 taken 146077 times.
✓ Branch 18 taken 4468 times.
✓ Branch 19 taken 150686 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 4 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 2 times.
✓ Branch 25 taken 79 times.
✓ Branch 26 taken 2 times.
✓ Branch 27 taken 77 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 54 times.
✗ Branch 32 not taken.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
|
51747738 | if (my_uca_have_contractions(uca)) { |
| 1330 | const uint16 *cweight; | ||
| 1331 | /* | ||
| 1332 | If we have scanned a code point which can have previous context, | ||
| 1333 | and there were some more code point already before, | ||
| 1334 | then verify that {prev_char, wc} together form | ||
| 1335 | a real previous context pair. | ||
| 1336 | Note, we support only 2-character long sequences with previous | ||
| 1337 | context at the moment. CLDR does not have longer sequences. | ||
| 1338 | */ | ||
| 1339 | 6004265 | if (my_uca_can_be_previous_context_tail(uca->contraction_flags, wc) && | |
| 1340 |
4/72✓ Branch 0 taken 56 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 40 times.
✓ Branch 3 taken 16 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
|
120 | wbeg != nochar && /* if not the very first character */ |
| 1341 | 56 | my_uca_can_be_previous_context_head(uca->contraction_flags, | |
| 1342 |
22/72✓ Branch 0 taken 64 times.
✓ Branch 1 taken 4798488 times.
✓ Branch 2 taken 40 times.
✓ Branch 3 taken 4798512 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 1161710 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 1161710 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 35488 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 35488 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 8913 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 8913 times.
✗ Branch 16 not taken.
✓ Branch 17 taken 14030 times.
✗ Branch 18 not taken.
✓ Branch 19 taken 14030 times.
✗ Branch 20 not taken.
✓ Branch 21 taken 14836 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 14836 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✓ Branch 33 taken 4263 times.
✗ Branch 34 not taken.
✓ Branch 35 taken 4263 times.
✗ Branch 36 not taken.
✓ Branch 37 taken 4468 times.
✗ Branch 38 not taken.
✓ Branch 39 taken 4468 times.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 2 times.
✗ Branch 50 not taken.
✓ Branch 51 taken 2 times.
✗ Branch 52 not taken.
✓ Branch 53 taken 2 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 2 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
|
6042328 | prev_char) && |
| 1343 |
2/72✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 40 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
|
80 | (cweight = previous_context_find(prev_char, wc))) { |
| 1344 | 40 | prev_char = 0; /* Clear for the next character */ | |
| 1345 | 40 | return *cweight; | |
| 1346 |
18/36✓ Branch 0 taken 5210 times.
✓ Branch 1 taken 4793302 times.
✓ Branch 2 taken 1050 times.
✓ Branch 3 taken 1160660 times.
✓ Branch 4 taken 1280 times.
✓ Branch 5 taken 34208 times.
✓ Branch 6 taken 320 times.
✓ Branch 7 taken 8593 times.
✓ Branch 8 taken 1425 times.
✓ Branch 9 taken 12605 times.
✓ Branch 10 taken 1159 times.
✓ Branch 11 taken 13677 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 540 times.
✓ Branch 17 taken 3723 times.
✓ Branch 18 taken 421 times.
✓ Branch 19 taken 4047 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 2 times.
✗ Branch 26 not taken.
✓ Branch 27 taken 2 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
|
6042224 | } else if (my_uca_can_be_contraction_head(uca->contraction_flags, wc)) { |
| 1347 | /* Check if wc starts a contraction */ | ||
| 1348 | size_t chars_skipped; | ||
| 1349 |
24/72✓ Branch 0 taken 5210 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1007 times.
✓ Branch 3 taken 4203 times.
✓ Branch 4 taken 1050 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 36 times.
✓ Branch 7 taken 1014 times.
✓ Branch 8 taken 1280 times.
✗ Branch 9 not taken.
✓ Branch 10 taken 244 times.
✓ Branch 11 taken 1036 times.
✓ Branch 12 taken 320 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 61 times.
✓ Branch 15 taken 259 times.
✓ Branch 16 taken 1425 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 508 times.
✓ Branch 19 taken 917 times.
✓ Branch 20 taken 1159 times.
✗ Branch 21 not taken.
✓ Branch 22 taken 583 times.
✓ Branch 23 taken 576 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 540 times.
✗ Branch 33 not taken.
✓ Branch 34 taken 59 times.
✓ Branch 35 taken 481 times.
✓ Branch 36 taken 421 times.
✗ Branch 37 not taken.
✓ Branch 38 taken 121 times.
✓ Branch 39 taken 300 times.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
|
11405 | if ((cweight = contraction_find(wc, &chars_skipped))) { |
| 1350 | 2619 | char_index += chars_skipped; | |
| 1351 | 2619 | return *cweight; | |
| 1352 | } | ||
| 1353 | } | ||
| 1354 | 6039605 | prev_char = wc; | |
| 1355 | } | ||
| 1356 | |||
| 1357 | /* Process single code point */ | ||
| 1358 | 51760039 | uint page = wc >> 8; | |
| 1359 | 51760039 | uint code = wc & 0xFF; | |
| 1360 | |||
| 1361 | /* If weight page for wc does not exist, then calculate algorithmically */ | ||
| 1362 | 51760039 | const uint16 *wpage = uca->weights[page]; | |
| 1363 |
29/72✓ Branch 0 taken 23725887 times.
✓ Branch 1 taken 5163795 times.
✓ Branch 2 taken 23301951 times.
✓ Branch 3 taken 423936 times.
✓ Branch 4 taken 6589443 times.
✓ Branch 5 taken 1203091 times.
✓ Branch 6 taken 6483459 times.
✓ Branch 7 taken 105984 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 99646 times.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✓ Branch 13 taken 26237 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 372 times.
✓ Branch 17 taken 7322327 times.
✓ Branch 18 taken 372 times.
✗ Branch 19 not taken.
✓ Branch 20 taken 357 times.
✓ Branch 21 taken 7322150 times.
✓ Branch 22 taken 357 times.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 138 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✓ Branch 29 taken 1010 times.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 89615 times.
✓ Branch 33 taken 60666 times.
✓ Branch 34 taken 89615 times.
✗ Branch 35 not taken.
✓ Branch 36 taken 89612 times.
✓ Branch 37 taken 65421 times.
✓ Branch 38 taken 89612 times.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✓ Branch 41 taken 4 times.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 81 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✓ Branch 53 taken 79 times.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✓ Branch 61 taken 54 times.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✓ Branch 65 taken 54 times.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
|
82255325 | if (!wpage) return next_implicit(wc); |
| 1364 | |||
| 1365 | /* Calculate pointer to wc's weight, using page and offset */ | ||
| 1366 | 21264753 | wbeg = wpage + code * uca->lengths[page]; | |
| 1367 | 21264753 | wbeg_stride = UCA900_DISTANCE_BETWEEN_WEIGHTS; | |
| 1368 |
23/36✓ Branch 0 taken 170235 times.
✓ Branch 1 taken 4993560 times.
✓ Branch 2 taken 35511 times.
✓ Branch 3 taken 1167580 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 99646 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 26237 times.
✓ Branch 8 taken 362 times.
✓ Branch 9 taken 7321965 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 7323166 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 136 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 1010 times.
✓ Branch 16 taken 1330 times.
✓ Branch 17 taken 59336 times.
✓ Branch 18 taken 1365 times.
✓ Branch 19 taken 64056 times.
✓ Branch 20 taken 1 times.
✓ Branch 21 taken 3 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 2 times.
✓ Branch 25 taken 79 times.
✗ Branch 26 not taken.
✓ Branch 27 taken 79 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 54 times.
✗ Branch 32 not taken.
✓ Branch 33 taken 54 times.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
|
21264753 | } while (!wbeg[0]); /* Skip ignorable code points */ |
| 1369 | |||
| 1370 | 21056961 | return *wbeg++; | |
| 1371 | } | ||
| 1372 | |||
| 1373 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 1374 | 79843923616 | inline int uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::more_weight() { | |
| 1375 | /* | ||
| 1376 | Check if the weights for the previous code point have been | ||
| 1377 | already fully scanned. If no, return the first non-zero | ||
| 1378 | weight. | ||
| 1379 | */ | ||
| 1380 | |||
| 1381 |
4/4✓ Branch 0 taken 418595692 times.
✓ Branch 1 taken 39712863105 times.
✓ Branch 2 taken 209496989 times.
✓ Branch 3 taken 209098703 times.
|
80262917594 | while (num_of_ce_left != 0 && *wbeg == 0) { |
| 1382 | 418993978 | wbeg += wbeg_stride; | |
| 1383 | 418993978 | --num_of_ce_left; | |
| 1384 | } | ||
| 1385 |
2/2✓ Branch 0 taken 209098703 times.
✓ Branch 1 taken 39712863105 times.
|
79843923616 | if (num_of_ce_left != 0) { |
| 1386 | 418197406 | uint16 rtn = *wbeg; | |
| 1387 | 418197406 | wbeg += wbeg_stride; | |
| 1388 | 418197406 | --num_of_ce_left; | |
| 1389 | 418197406 | return rtn; /* return the next weight from expansion */ | |
| 1390 | } | ||
| 1391 | 79425726210 | return -1; | |
| 1392 | } | ||
| 1393 | |||
| 1394 | 3341369 | static inline bool is_hiragana_char(my_wc_t wc) { | |
| 1395 |
4/4✓ Branch 0 taken 3301899 times.
✓ Branch 1 taken 39470 times.
✓ Branch 2 taken 2449 times.
✓ Branch 3 taken 3299450 times.
|
3341369 | return wc >= 0x3041 && wc <= 0x3096; |
| 1396 | } | ||
| 1397 | |||
| 1398 | 3343491 | static inline bool is_katakana_char(my_wc_t wc) { | |
| 1399 |
6/6✓ Branch 0 taken 3301534 times.
✓ Branch 1 taken 41957 times.
✓ Branch 2 taken 3299777 times.
✓ Branch 3 taken 1757 times.
✓ Branch 4 taken 3146358 times.
✓ Branch 5 taken 195376 times.
|
6489849 | return (wc >= 0x30A1 && wc <= 0x30FA) || // Full width katakana |
| 1400 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 3146022 times.
|
6489849 | (wc >= 0xFF66 && wc <= 0xFF9D); // Half width katakana |
| 1401 | } | ||
| 1402 | |||
| 1403 | 3341370 | static inline bool is_katakana_iteration(my_wc_t wc) { | |
| 1404 |
4/4✓ Branch 0 taken 3341362 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 3341356 times.
|
3341370 | return wc == 0x30FD || wc == 0x30FE; |
| 1405 | } | ||
| 1406 | |||
| 1407 | 3338920 | static inline bool is_hiragana_iteration(my_wc_t wc) { | |
| 1408 |
4/4✓ Branch 0 taken 3338912 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 3338906 times.
|
3338920 | return wc == 0x309D || wc == 0x309E; |
| 1409 | } | ||
| 1410 | |||
| 1411 | 3340713 | static inline bool is_ja_length_mark(my_wc_t wc) { return wc == 0x30FC; } | |
| 1412 | |||
| 1413 | /** | ||
| 1414 | Return quaternary weight when running for that level. | ||
| 1415 | |||
| 1416 | @retval 0 - Do not return quaternary weight. | ||
| 1417 | @retval others - Quaternary weight for this character. | ||
| 1418 | */ | ||
| 1419 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 1420 | ALWAYS_INLINE int | ||
| 1421 | uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::handle_ja_contraction_quat_wt() { | ||
| 1422 | /* | ||
| 1423 | For Japanese, only weight shift rule and previous context rule is | ||
| 1424 | defined. And in previous context rules, the first character is always | ||
| 1425 | katakana / hiragana, and the second character is always iteration or | ||
| 1426 | length mark. The quaternary weight of iteration / length mark is | ||
| 1427 | same as the first character. So has_quaternary_weight is always true. | ||
| 1428 | For how we return quaternary weight, please refer to the comment in | ||
| 1429 | handle_ja_common_quat_wt(). | ||
| 1430 | */ | ||
| 1431 |
8/28✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 224 times.
✓ Branch 5 taken 1389 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 14 times.
✓ Branch 13 taken 42 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 12 times.
✓ Branch 23 taken 45 times.
✓ Branch 24 taken 12 times.
✓ Branch 25 taken 45 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
1783 | if (weight_lv == 3) { |
| 1432 | 262 | wbeg = nochar; | |
| 1433 | 262 | num_of_ce_left = 0; | |
| 1434 |
8/28✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 214 times.
✓ Branch 5 taken 10 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 8 times.
✓ Branch 13 taken 6 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 6 times.
✓ Branch 23 taken 6 times.
✓ Branch 24 taken 6 times.
✓ Branch 25 taken 6 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
262 | if (is_katakana_char(prev_char)) { |
| 1435 | 234 | return JA_KATA_QUAT_WEIGHT; | |
| 1436 |
4/28✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 6 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 6 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 6 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
28 | } else if (is_hiragana_char(prev_char)) { |
| 1437 | 28 | return JA_HIRA_QUAT_WEIGHT; | |
| 1438 | } | ||
| 1439 | } | ||
| 1440 | 1521 | return 0; | |
| 1441 | } | ||
| 1442 | |||
| 1443 | /** | ||
| 1444 | Check whether quaternary weight is needed for character with Japanese | ||
| 1445 | kana-sensitive collation. If it is, return quaternary weight when running | ||
| 1446 | for that level. | ||
| 1447 | |||
| 1448 | @retval 0 - Quaternary weight check is done. | ||
| 1449 | @retval -1 - There is no quaternary weight for this character. | ||
| 1450 | @retval others - Quaternary weight for this character. | ||
| 1451 | */ | ||
| 1452 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 1453 | ALWAYS_INLINE int | ||
| 1454 | uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::handle_ja_common_quat_wt( | ||
| 1455 | my_wc_t wc) { | ||
| 1456 | /* | ||
| 1457 | For Japanese kana-sensitive collation, we detect whether quaternary | ||
| 1458 | weight is necessary when scanning for the first level of weight. | ||
| 1459 | If it is, the quaternary weight will be returned for katakana / | ||
| 1460 | hiragana later. | ||
| 1461 | */ | ||
| 1462 |
8/28✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 3337783 times.
✓ Branch 5 taken 7338 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 740 times.
✓ Branch 13 taken 39 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 80 times.
✓ Branch 23 taken 54 times.
✓ Branch 24 taken 79 times.
✓ Branch 25 taken 54 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
3346167 | if (weight_lv == 0 && !has_quaternary_weight) { |
| 1463 |
14/56✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 3337288 times.
✓ Branch 9 taken 6 times.
✓ Branch 10 taken 3336717 times.
✓ Branch 11 taken 571 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 724 times.
✗ Branch 25 not taken.
✓ Branch 26 taken 712 times.
✓ Branch 27 taken 12 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✓ Branch 44 taken 59 times.
✗ Branch 45 not taken.
✓ Branch 46 taken 31 times.
✓ Branch 47 taken 28 times.
✓ Branch 48 taken 56 times.
✓ Branch 49 taken 1 times.
✓ Branch 50 taken 31 times.
✓ Branch 51 taken 25 times.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
|
10014943 | if (is_katakana_char(wc) || is_katakana_iteration(wc) || |
| 1464 |
30/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 3337294 times.
✓ Branch 17 taken 489 times.
✓ Branch 18 taken 3336711 times.
✓ Branch 19 taken 6 times.
✓ Branch 20 taken 5 times.
✓ Branch 21 taken 3336706 times.
✓ Branch 22 taken 1077 times.
✓ Branch 23 taken 3336706 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 724 times.
✓ Branch 49 taken 16 times.
✓ Branch 50 taken 712 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 4 times.
✓ Branch 53 taken 708 times.
✓ Branch 54 taken 32 times.
✓ Branch 55 taken 708 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 59 times.
✓ Branch 89 taken 21 times.
✓ Branch 90 taken 30 times.
✓ Branch 91 taken 1 times.
✓ Branch 92 taken 3 times.
✓ Branch 93 taken 27 times.
✓ Branch 94 taken 53 times.
✓ Branch 95 taken 27 times.
✓ Branch 96 taken 57 times.
✓ Branch 97 taken 22 times.
✓ Branch 98 taken 31 times.
✗ Branch 99 not taken.
✓ Branch 100 taken 8 times.
✓ Branch 101 taken 23 times.
✓ Branch 102 taken 56 times.
✓ Branch 103 taken 23 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
|
13352427 | is_hiragana_char(wc) || is_hiragana_iteration(wc) || |
| 1465 | 3337484 | is_ja_length_mark(wc)) | |
| 1466 | 1218 | has_quaternary_weight = true; | |
| 1467 |
8/28✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 4284 times.
✓ Branch 5 taken 6691499 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 83 times.
✓ Branch 13 taken 1597 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 90 times.
✓ Branch 23 taken 279 times.
✓ Branch 24 taken 90 times.
✓ Branch 25 taken 279 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
6698201 | } else if (weight_lv == 3) { |
| 1468 | 4547 | wbeg = nochar; | |
| 1469 | 4547 | num_of_ce_left = 0; | |
| 1470 |
30/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 3081 times.
✓ Branch 17 taken 1203 times.
✓ Branch 18 taken 3075 times.
✓ Branch 19 taken 6 times.
✓ Branch 20 taken 5 times.
✓ Branch 21 taken 3070 times.
✓ Branch 22 taken 1214 times.
✓ Branch 23 taken 3070 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 43 times.
✓ Branch 49 taken 40 times.
✓ Branch 50 taken 43 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 4 times.
✓ Branch 53 taken 39 times.
✓ Branch 54 taken 44 times.
✓ Branch 55 taken 39 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 57 times.
✓ Branch 89 taken 33 times.
✓ Branch 90 taken 57 times.
✗ Branch 91 not taken.
✓ Branch 92 taken 3 times.
✓ Branch 93 taken 54 times.
✓ Branch 94 taken 36 times.
✓ Branch 95 taken 54 times.
✓ Branch 96 taken 55 times.
✓ Branch 97 taken 35 times.
✓ Branch 98 taken 54 times.
✓ Branch 99 taken 1 times.
✓ Branch 100 taken 3 times.
✓ Branch 101 taken 51 times.
✓ Branch 102 taken 39 times.
✓ Branch 103 taken 51 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
|
7776 | if (is_katakana_char(wc) || is_katakana_iteration(wc) || |
| 1471 | 3229 | is_ja_length_mark(wc)) { | |
| 1472 | 1333 | return JA_KATA_QUAT_WEIGHT; | |
| 1473 |
22/84✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 1398 times.
✓ Branch 13 taken 1672 times.
✓ Branch 14 taken 6 times.
✓ Branch 15 taken 1392 times.
✓ Branch 16 taken 1678 times.
✓ Branch 17 taken 1392 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✓ Branch 36 taken 12 times.
✓ Branch 37 taken 27 times.
✗ Branch 38 not taken.
✓ Branch 39 taken 12 times.
✓ Branch 40 taken 27 times.
✓ Branch 41 taken 12 times.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✓ Branch 66 taken 10 times.
✓ Branch 67 taken 44 times.
✓ Branch 68 taken 1 times.
✓ Branch 69 taken 9 times.
✓ Branch 70 taken 45 times.
✓ Branch 71 taken 9 times.
✓ Branch 72 taken 9 times.
✓ Branch 73 taken 42 times.
✗ Branch 74 not taken.
✓ Branch 75 taken 9 times.
✓ Branch 76 taken 42 times.
✓ Branch 77 taken 9 times.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
|
3214 | } else if (is_hiragana_char(wc) || is_hiragana_iteration(wc)) { |
| 1474 | 1792 | return JA_HIRA_QUAT_WEIGHT; | |
| 1475 | } | ||
| 1476 | 1422 | return -1; | |
| 1477 | } | ||
| 1478 | 10032336 | return 0; | |
| 1479 | } | ||
| 1480 | |||
| 1481 | // Generic version that can handle any number of levels. | ||
| 1482 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 1483 | ALWAYS_INLINE int uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::next_raw() { | ||
| 1484 | 20171736869 | int remain_weight = more_weight(); | |
| 1485 |
36/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 3577324 times.
✓ Branch 17 taken 20053073 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 103283319 times.
✓ Branch 21 taken 600846594 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 20040882 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 13362597 times.
✓ Branch 28 taken 92111265 times.
✓ Branch 29 taken 186919638 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 94401968 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 56 times.
✓ Branch 49 taken 2610 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 108 times.
✓ Branch 53 taken 61983 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 459 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 56 times.
✓ Branch 60 taken 45 times.
✓ Branch 61 taken 19281 times.
✗ Branch 62 not taken.
✓ Branch 63 taken 375695071 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 67 times.
✓ Branch 89 taken 659 times.
✓ Branch 90 taken 67 times.
✓ Branch 91 taken 659 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 297 times.
✓ Branch 95 taken 8755 times.
✓ Branch 96 taken 279 times.
✓ Branch 97 taken 8773 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 96 times.
✓ Branch 100 taken 93 times.
✓ Branch 101 taken 16113 times.
✓ Branch 102 taken 93 times.
✓ Branch 103 taken 16113 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 8214 times.
✓ Branch 107 taken 19130533695 times.
✓ Branch 108 taken 8258 times.
✓ Branch 109 taken 19177966079 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 4116 times.
|
39818948755 | if (remain_weight >= 0) return remain_weight; |
| 1486 | |||
| 1487 | do { | ||
| 1488 | 39712045605 | my_wc_t wc = 0; | |
| 1489 | |||
| 1490 | /* Get next code point */ | ||
| 1491 |
44/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 10009965 times.
✓ Branch 17 taken 10053611 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 300260109 times.
✓ Branch 21 taken 300859818 times.
✓ Branch 22 taken 10013990 times.
✓ Branch 23 taken 10036096 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 6677229 times.
✓ Branch 27 taken 6692036 times.
✓ Branch 28 taken 93427849 times.
✓ Branch 29 taken 93606617 times.
✓ Branch 30 taken 39165543 times.
✓ Branch 31 taken 55241660 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 146 times.
✓ Branch 49 taken 2476 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 270 times.
✓ Branch 53 taken 61713 times.
✓ Branch 54 taken 153 times.
✓ Branch 55 taken 306 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✓ Branch 58 taken 28 times.
✓ Branch 59 taken 28 times.
✓ Branch 60 taken 84 times.
✓ Branch 61 taken 19197 times.
✓ Branch 62 taken 151912324 times.
✓ Branch 63 taken 223783187 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 162 times.
✓ Branch 89 taken 506 times.
✓ Branch 90 taken 163 times.
✓ Branch 91 taken 505 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 2119 times.
✓ Branch 95 taken 6642 times.
✓ Branch 96 taken 2172 times.
✓ Branch 97 taken 6601 times.
✓ Branch 98 taken 39 times.
✓ Branch 99 taken 57 times.
✓ Branch 100 taken 1816 times.
✓ Branch 101 taken 14298 times.
✓ Branch 102 taken 1825 times.
✓ Branch 103 taken 14288 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 368824991 times.
✓ Branch 107 taken 18795301959 times.
✓ Branch 108 taken 367868608 times.
✓ Branch 109 taken 18868170303 times.
✓ Branch 110 taken 1599 times.
✓ Branch 111 taken 2517 times.
|
39712045605 | int mblen = mb_wc(&wc, sbeg, send); |
| 1492 |
44/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 10028397 times.
✓ Branch 17 taken 10035179 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 300813069 times.
✓ Branch 21 taken 300306858 times.
✓ Branch 22 taken 10032422 times.
✓ Branch 23 taken 10017664 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 6689517 times.
✓ Branch 27 taken 6679748 times.
✓ Branch 28 taken 93599881 times.
✓ Branch 29 taken 93434585 times.
✓ Branch 30 taken 39171687 times.
✓ Branch 31 taken 55235516 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 146 times.
✓ Branch 49 taken 2476 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 270 times.
✓ Branch 53 taken 61713 times.
✓ Branch 54 taken 153 times.
✓ Branch 55 taken 306 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✓ Branch 58 taken 28 times.
✓ Branch 59 taken 28 times.
✓ Branch 60 taken 84 times.
✓ Branch 61 taken 19197 times.
✓ Branch 62 taken 151912310 times.
✓ Branch 63 taken 223783201 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 162 times.
✓ Branch 89 taken 506 times.
✓ Branch 90 taken 163 times.
✓ Branch 91 taken 505 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 2119 times.
✓ Branch 95 taken 6642 times.
✓ Branch 96 taken 2172 times.
✓ Branch 97 taken 6601 times.
✓ Branch 98 taken 39 times.
✓ Branch 99 taken 57 times.
✓ Branch 100 taken 1816 times.
✓ Branch 101 taken 14298 times.
✓ Branch 102 taken 1825 times.
✓ Branch 103 taken 14288 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 368811613 times.
✓ Branch 107 taken 18795315337 times.
✓ Branch 108 taken 367858504 times.
✓ Branch 109 taken 18868180407 times.
✓ Branch 110 taken 1599 times.
✓ Branch 111 taken 2517 times.
|
39712045605 | if (mblen <= 0) { |
| 1493 | if (LEVELS_FOR_COMPARE == 1) { | ||
| 1494 | 1021355678 | ++weight_lv; | |
| 1495 | 1757075983 | return -1; | |
| 1496 | } | ||
| 1497 | |||
| 1498 |
30/84✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 10027641 times.
✓ Branch 13 taken 756 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 200542232 times.
✓ Branch 17 taken 100270837 times.
✓ Branch 18 taken 6688324 times.
✓ Branch 19 taken 3344098 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 3344797 times.
✓ Branch 23 taken 3344720 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✓ Branch 36 taken 114 times.
✓ Branch 37 taken 32 times.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 180 times.
✓ Branch 41 taken 90 times.
✓ Branch 42 taken 102 times.
✓ Branch 43 taken 51 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✓ Branch 46 taken 14 times.
✓ Branch 47 taken 14 times.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✓ Branch 66 taken 131 times.
✓ Branch 67 taken 31 times.
✓ Branch 68 taken 132 times.
✓ Branch 69 taken 31 times.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✓ Branch 72 taken 1866 times.
✓ Branch 73 taken 253 times.
✓ Branch 74 taken 1914 times.
✓ Branch 75 taken 258 times.
✓ Branch 76 taken 30 times.
✓ Branch 77 taken 9 times.
✓ Branch 78 taken 931 times.
✓ Branch 79 taken 885 times.
✓ Branch 80 taken 940 times.
✓ Branch 81 taken 885 times.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
|
327572298 | if (++weight_lv < LEVELS_FOR_COMPARE) { |
| 1499 |
4/28✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 10027641 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 114 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 131 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 132 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
10028018 | if (LEVELS_FOR_COMPARE == 4 && cs->coll_param == &ja_coll_param) { |
| 1500 | // Return directly if we don't have quaternary weight. | ||
| 1501 |
16/56✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 3342459 times.
✓ Branch 9 taken 6685182 times.
✓ Branch 10 taken 3341646 times.
✓ Branch 11 taken 813 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 38 times.
✓ Branch 25 taken 76 times.
✓ Branch 26 taken 6 times.
✓ Branch 27 taken 32 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✓ Branch 44 taken 43 times.
✓ Branch 45 taken 88 times.
✓ Branch 46 taken 3 times.
✓ Branch 47 taken 40 times.
✓ Branch 48 taken 43 times.
✓ Branch 49 taken 89 times.
✓ Branch 50 taken 3 times.
✓ Branch 51 taken 40 times.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
|
10028018 | if (weight_lv == 3 && !has_quaternary_weight) return -1; |
| 1502 | } | ||
| 1503 | /* | ||
| 1504 | Restart scanning from the beginning of the string, and add | ||
| 1505 | a level separator. | ||
| 1506 | */ | ||
| 1507 | 217267690 | sbeg = sbeg_dup; | |
| 1508 | 217267690 | return 0; | |
| 1509 | } | ||
| 1510 | |||
| 1511 | // If we don't have any more levels left, we're done. | ||
| 1512 | 106962950 | return -1; | |
| 1513 | } | ||
| 1514 | |||
| 1515 | 38363117629 | sbeg += mblen; | |
| 1516 |
22/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 10035179 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✓ Branch 21 taken 300306858 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 10017664 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 6679748 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 93434585 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 55235516 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 2476 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✓ Branch 53 taken 61713 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 306 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 28 times.
✗ Branch 60 not taken.
✓ Branch 61 taken 19197 times.
✗ Branch 62 not taken.
✓ Branch 63 taken 223783201 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✓ Branch 89 taken 506 times.
✗ Branch 90 not taken.
✓ Branch 91 taken 505 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✓ Branch 95 taken 6642 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 6601 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 57 times.
✗ Branch 100 not taken.
✓ Branch 101 taken 14298 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 14288 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✓ Branch 107 taken 18795315337 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 18868180407 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 2517 times.
|
38363117629 | assert(wc <= uca->maxchar); // mb_wc() has already checked this. |
| 1517 | |||
| 1518 |
31/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 10035179 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 130159215 times.
✓ Branch 21 taken 170147643 times.
✗ Branch 22 not taken.
✓ Branch 23 taken 10017664 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 6679748 times.
✓ Branch 28 taken 36705833 times.
✓ Branch 29 taken 56728752 times.
✓ Branch 30 taken 604 times.
✓ Branch 31 taken 55235078 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 2476 times.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 26676 times.
✓ Branch 53 taken 35037 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 306 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 28 times.
✓ Branch 60 taken 7518 times.
✓ Branch 61 taken 11679 times.
✓ Branch 62 taken 18 times.
✓ Branch 63 taken 223782848 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 506 times.
✗ Branch 89 not taken.
✓ Branch 90 taken 505 times.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 121 times.
✓ Branch 95 taken 6521 times.
✓ Branch 96 taken 121 times.
✓ Branch 97 taken 6480 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 57 times.
✗ Branch 100 not taken.
✓ Branch 101 taken 14298 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 14288 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 51445150 times.
✓ Branch 107 taken 18750560320 times.
✗ Branch 108 not taken.
✓ Branch 109 taken 18815643420 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 2517 times.
|
38363117629 | if (my_uca_have_contractions(uca)) { |
| 1519 | const uint16 *cweight; | ||
| 1520 | /* | ||
| 1521 | If we have scanned a code point which can have previous context, | ||
| 1522 | and there were some more code points already before, | ||
| 1523 | then verify that {prev_char, wc} together form | ||
| 1524 | a real previous context pair. | ||
| 1525 | Note, we support only 2-character long sequences with previous | ||
| 1526 | context at the moment. CLDR does not have longer sequences. | ||
| 1527 | CLDR doesn't have previous context rule whose first character is | ||
| 1528 | 0x0000, so the initial value (0) of prev_char won't break the logic. | ||
| 1529 | */ | ||
| 1530 |
12/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 1613 times.
✓ Branch 17 taken 12263 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 639 times.
✓ Branch 21 taken 12240 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 56 times.
✓ Branch 49 taken 16 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 57 times.
✓ Branch 89 taken 16 times.
✓ Branch 90 taken 57 times.
✓ Branch 91 taken 21 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✓ Branch 95 taken 3 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 3 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
|
213602186 | if (my_uca_can_be_previous_context_tail(uca->contraction_flags, wc) && |
| 1531 | 26984 | my_uca_can_be_previous_context_head(uca->contraction_flags, | |
| 1532 |
36/224✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 13876 times.
✓ Branch 33 taken 10021303 times.
✓ Branch 34 taken 1613 times.
✓ Branch 35 taken 10033566 times.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 12879 times.
✓ Branch 41 taken 130146336 times.
✓ Branch 42 taken 639 times.
✓ Branch 43 taken 130158576 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✓ Branch 57 taken 36705833 times.
✗ Branch 58 not taken.
✓ Branch 59 taken 36705833 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✓ Branch 96 taken 72 times.
✓ Branch 97 taken 2404 times.
✓ Branch 98 taken 56 times.
✓ Branch 99 taken 2420 times.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✓ Branch 105 taken 26676 times.
✗ Branch 106 not taken.
✓ Branch 107 taken 26676 times.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✓ Branch 121 taken 7518 times.
✗ Branch 122 not taken.
✓ Branch 123 taken 7518 times.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✓ Branch 176 taken 73 times.
✓ Branch 177 taken 433 times.
✓ Branch 178 taken 57 times.
✓ Branch 179 taken 449 times.
✓ Branch 180 taken 78 times.
✓ Branch 181 taken 427 times.
✓ Branch 182 taken 57 times.
✓ Branch 183 taken 448 times.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✓ Branch 188 taken 3 times.
✓ Branch 189 taken 118 times.
✗ Branch 190 not taken.
✓ Branch 191 taken 121 times.
✓ Branch 192 taken 3 times.
✓ Branch 193 taken 118 times.
✗ Branch 194 not taken.
✓ Branch 195 taken 121 times.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✓ Branch 213 taken 5140 times.
✗ Branch 214 not taken.
✓ Branch 215 taken 5140 times.
✗ Branch 216 not taken.
✓ Branch 217 taken 5231 times.
✗ Branch 218 not taken.
✓ Branch 219 taken 5231 times.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
|
176975505 | prev_char) && |
| 1533 |
10/224✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✓ Branch 32 taken 1613 times.
✗ Branch 33 not taken.
✓ Branch 34 taken 1613 times.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 639 times.
✗ Branch 41 not taken.
✓ Branch 42 taken 639 times.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✓ Branch 96 taken 56 times.
✗ Branch 97 not taken.
✓ Branch 98 taken 56 times.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✓ Branch 176 taken 57 times.
✗ Branch 177 not taken.
✓ Branch 178 taken 57 times.
✗ Branch 179 not taken.
✓ Branch 180 taken 57 times.
✗ Branch 181 not taken.
✓ Branch 182 taken 57 times.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✗ Branch 212 not taken.
✗ Branch 213 not taken.
✗ Branch 214 not taken.
✗ Branch 215 not taken.
✗ Branch 216 not taken.
✗ Branch 217 not taken.
✗ Branch 218 not taken.
✗ Branch 219 not taken.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
|
4844 | (cweight = previous_context_find(prev_char, wc))) { |
| 1534 | // For Japanese kana-sensitive collation. | ||
| 1535 |
4/28✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1613 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 56 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 57 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 57 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
1783 | if (LEVELS_FOR_COMPARE == 4 && cs->coll_param == &ja_coll_param) { |
| 1536 | 1783 | int quat_wt = handle_ja_contraction_quat_wt(); | |
| 1537 | 1783 | prev_char = 0; | |
| 1538 |
8/28✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 224 times.
✓ Branch 5 taken 1389 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 14 times.
✓ Branch 13 taken 42 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 12 times.
✓ Branch 23 taken 45 times.
✓ Branch 24 taken 12 times.
✓ Branch 25 taken 45 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
1783 | if (quat_wt > 0) return quat_wt; |
| 1539 | } | ||
| 1540 | 2160 | prev_char = 0; /* Clear for the next code point */ | |
| 1541 | 2160 | return *cweight; | |
| 1542 |
18/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 10033566 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 145311 times.
✓ Branch 21 taken 130013265 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 40577 times.
✓ Branch 29 taken 36665256 times.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 2420 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 2421 times.
✓ Branch 53 taken 24255 times.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 717 times.
✓ Branch 61 taken 6801 times.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✓ Branch 89 taken 449 times.
✗ Branch 90 not taken.
✓ Branch 91 taken 448 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✓ Branch 95 taken 121 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 121 times.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 354 times.
✓ Branch 107 taken 4786 times.
✓ Branch 108 taken 312 times.
✓ Branch 109 taken 4919 times.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
|
176946099 | } else if (my_uca_can_be_contraction_head(uca->contraction_flags, wc)) { |
| 1543 | /* Check if wc starts a contraction */ | ||
| 1544 | size_t chars_skipped; // Ignored. | ||
| 1545 |
18/224✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 145311 times.
✗ Branch 41 not taken.
✓ Branch 42 taken 402 times.
✓ Branch 43 taken 144909 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✓ Branch 56 taken 40577 times.
✗ Branch 57 not taken.
✓ Branch 58 taken 139 times.
✓ Branch 59 taken 40438 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✓ Branch 104 taken 2421 times.
✗ Branch 105 not taken.
✓ Branch 106 taken 108 times.
✓ Branch 107 taken 2313 times.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✗ Branch 112 not taken.
✗ Branch 113 not taken.
✗ Branch 114 not taken.
✗ Branch 115 not taken.
✗ Branch 116 not taken.
✗ Branch 117 not taken.
✗ Branch 118 not taken.
✗ Branch 119 not taken.
✓ Branch 120 taken 717 times.
✗ Branch 121 not taken.
✓ Branch 122 taken 36 times.
✓ Branch 123 taken 681 times.
✗ Branch 124 not taken.
✗ Branch 125 not taken.
✗ Branch 126 not taken.
✗ Branch 127 not taken.
✗ Branch 128 not taken.
✗ Branch 129 not taken.
✗ Branch 130 not taken.
✗ Branch 131 not taken.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✗ Branch 148 not taken.
✗ Branch 149 not taken.
✗ Branch 150 not taken.
✗ Branch 151 not taken.
✗ Branch 152 not taken.
✗ Branch 153 not taken.
✗ Branch 154 not taken.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
✗ Branch 160 not taken.
✗ Branch 161 not taken.
✗ Branch 162 not taken.
✗ Branch 163 not taken.
✗ Branch 164 not taken.
✗ Branch 165 not taken.
✗ Branch 166 not taken.
✗ Branch 167 not taken.
✗ Branch 168 not taken.
✗ Branch 169 not taken.
✗ Branch 170 not taken.
✗ Branch 171 not taken.
✗ Branch 172 not taken.
✗ Branch 173 not taken.
✗ Branch 174 not taken.
✗ Branch 175 not taken.
✗ Branch 176 not taken.
✗ Branch 177 not taken.
✗ Branch 178 not taken.
✗ Branch 179 not taken.
✗ Branch 180 not taken.
✗ Branch 181 not taken.
✗ Branch 182 not taken.
✗ Branch 183 not taken.
✗ Branch 184 not taken.
✗ Branch 185 not taken.
✗ Branch 186 not taken.
✗ Branch 187 not taken.
✗ Branch 188 not taken.
✗ Branch 189 not taken.
✗ Branch 190 not taken.
✗ Branch 191 not taken.
✗ Branch 192 not taken.
✗ Branch 193 not taken.
✗ Branch 194 not taken.
✗ Branch 195 not taken.
✗ Branch 196 not taken.
✗ Branch 197 not taken.
✗ Branch 198 not taken.
✗ Branch 199 not taken.
✗ Branch 200 not taken.
✗ Branch 201 not taken.
✗ Branch 202 not taken.
✗ Branch 203 not taken.
✗ Branch 204 not taken.
✗ Branch 205 not taken.
✗ Branch 206 not taken.
✗ Branch 207 not taken.
✗ Branch 208 not taken.
✗ Branch 209 not taken.
✗ Branch 210 not taken.
✗ Branch 211 not taken.
✓ Branch 212 taken 354 times.
✗ Branch 213 not taken.
✓ Branch 214 taken 104 times.
✓ Branch 215 taken 250 times.
✓ Branch 216 taken 312 times.
✗ Branch 217 not taken.
✓ Branch 218 taken 101 times.
✓ Branch 219 taken 211 times.
✗ Branch 220 not taken.
✗ Branch 221 not taken.
✗ Branch 222 not taken.
✗ Branch 223 not taken.
|
189692 | if ((cweight = contraction_find(wc, &chars_skipped))) return *cweight; |
| 1546 | } | ||
| 1547 | 176945209 | prev_char = wc; | |
| 1548 | } | ||
| 1549 | |||
| 1550 | // For Japanese kana-sensitive collation. | ||
| 1551 |
4/28✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 10033566 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 2420 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 449 times.
✗ Branch 23 not taken.
✓ Branch 24 taken 448 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
10036883 | if (LEVELS_FOR_COMPARE == 4 && cs->coll_param == &ja_coll_param) { |
| 1552 |
8/28✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 3345121 times.
✓ Branch 5 taken 6688445 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 779 times.
✓ Branch 13 taken 1641 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 134 times.
✓ Branch 23 taken 315 times.
✓ Branch 24 taken 133 times.
✓ Branch 25 taken 315 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
10036883 | int quat_wt = handle_ja_common_quat_wt(wc); |
| 1553 |
8/28✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1392 times.
✓ Branch 5 taken 10032174 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 12 times.
✓ Branch 13 taken 2408 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 9 times.
✓ Branch 23 taken 440 times.
✓ Branch 24 taken 9 times.
✓ Branch 25 taken 439 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
10036883 | if (quat_wt == -1) |
| 1554 | 1422 | continue; | |
| 1555 |
8/28✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 2892 times.
✓ Branch 5 taken 10029282 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 71 times.
✓ Branch 13 taken 2337 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✓ Branch 22 taken 81 times.
✓ Branch 23 taken 359 times.
✓ Branch 24 taken 81 times.
✓ Branch 25 taken 358 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
|
10035461 | else if (quat_wt) |
| 1556 | 3125 | return quat_wt; | |
| 1557 | } | ||
| 1558 | /* Process single code point */ | ||
| 1559 | 38265827346 | uint page = wc >> 8; | |
| 1560 | 38265827346 | uint code = wc & 0xFF; | |
| 1561 | |||
| 1562 | /* If weight page for wc does not exist, then calculate algorithmically */ | ||
| 1563 | 38265827346 | const uint16 *wpage = uca->weights[page]; | |
| 1564 |
35/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 9476838 times.
✓ Branch 17 taken 552444 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 289097553 times.
✓ Branch 21 taken 11208264 times.
✓ Branch 22 taken 9665721 times.
✓ Branch 23 taken 351943 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 6444000 times.
✓ Branch 27 taken 235748 times.
✓ Branch 28 taken 90209280 times.
✓ Branch 29 taken 3225166 times.
✓ Branch 30 taken 3228772 times.
✓ Branch 31 taken 52006306 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✓ Branch 49 taken 2337 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✓ Branch 53 taken 61605 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 306 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 28 times.
✗ Branch 60 not taken.
✓ Branch 61 taken 19161 times.
✓ Branch 62 taken 26 times.
✓ Branch 63 taken 223782822 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✓ Branch 89 taken 359 times.
✗ Branch 90 not taken.
✓ Branch 91 taken 358 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 142 times.
✓ Branch 95 taken 6500 times.
✓ Branch 96 taken 119 times.
✓ Branch 97 taken 6482 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 57 times.
✓ Branch 100 taken 138 times.
✓ Branch 101 taken 14160 times.
✓ Branch 102 taken 117 times.
✓ Branch 103 taken 14171 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 8035 times.
✓ Branch 107 taken 18750557321 times.
✓ Branch 108 taken 10829 times.
✓ Branch 109 taken 18815637721 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 2517 times.
|
38673968916 | if (!wpage) return next_implicit(wc); |
| 1565 | |||
| 1566 | /* Calculate pointer to wc's weight, using page and offset */ | ||
| 1567 | 37857685776 | wbeg = UCA900_WEIGHT_ADDR(wpage, weight_lv, code); | |
| 1568 | 37857685776 | wbeg_stride = UCA900_DISTANCE_BETWEEN_WEIGHTS; | |
| 1569 | 555498 | num_of_ce_left = UCA900_NUM_OF_CE(wpage, code); | |
| 1570 |
36/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 10503 times.
✓ Branch 17 taken 543333 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 273333 times.
✓ Branch 21 taken 10934931 times.
✓ Branch 22 taken 9204 times.
✓ Branch 23 taken 342739 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 6668 times.
✓ Branch 27 taken 229080 times.
✓ Branch 28 taken 114828 times.
✓ Branch 29 taken 3110338 times.
✓ Branch 30 taken 5235 times.
✓ Branch 31 taken 52001071 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 12 times.
✓ Branch 49 taken 2337 times.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✓ Branch 53 taken 61605 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 306 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 28 times.
✗ Branch 60 not taken.
✓ Branch 61 taken 19161 times.
✓ Branch 62 taken 440 times.
✓ Branch 63 taken 223782382 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✓ Branch 88 taken 9 times.
✓ Branch 89 taken 359 times.
✓ Branch 90 taken 9 times.
✓ Branch 91 taken 358 times.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 6 times.
✓ Branch 95 taken 6494 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 6482 times.
✗ Branch 98 not taken.
✓ Branch 99 taken 57 times.
✓ Branch 100 taken 1 times.
✓ Branch 101 taken 14159 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 14171 times.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✓ Branch 106 taken 33593255 times.
✓ Branch 107 taken 18716964066 times.
✓ Branch 108 taken 58072832 times.
✓ Branch 109 taken 18757564889 times.
✗ Branch 110 not taken.
✓ Branch 111 taken 2517 times.
|
37857687198 | } while (!wbeg[0]); /* Skip ignorable code points */ |
| 1571 | |||
| 1572 | 37765600863 | uint16 rtn = *wbeg; | |
| 1573 | 37765600863 | wbeg += wbeg_stride; | |
| 1574 | 37765600863 | --num_of_ce_left; | |
| 1575 | 37765600863 | return rtn; | |
| 1576 | } | ||
| 1577 | |||
| 1578 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 1579 | template <class T, class U> | ||
| 1580 | ALWAYS_INLINE void uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::for_each_weight( | ||
| 1581 | T func, U preaccept_data) { | ||
| 1582 |
13/64✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 3344224 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✓ Branch 23 taken 3344224 times.
✓ Branch 24 taken 3344870 times.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 3344870 times.
✓ Branch 28 taken 43555055 times.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 43555064 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✓ Branch 52 taken 51 times.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✓ Branch 55 taken 51 times.
✓ Branch 56 taken 14 times.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✓ Branch 59 taken 14 times.
✓ Branch 60 taken 151912063 times.
✗ Branch 61 not taken.
✓ Branch 62 taken 105 times.
✓ Branch 63 taken 151911958 times.
|
202156217 | if (cs->tailoring || cs->mbminlen != 1 || cs->coll_param) { |
| 1583 | // Slower, generic path. | ||
| 1584 | int s_res; | ||
| 1585 |
12/32✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 20287995 times.
✓ Branch 9 taken 3342402 times.
✓ Branch 10 taken 603859076 times.
✓ Branch 11 taken 100270837 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✓ Branch 14 taken 185431022 times.
✓ Branch 15 taken 93599881 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 2628 times.
✓ Branch 25 taken 38 times.
✓ Branch 26 taken 62001 times.
✓ Branch 27 taken 90 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 19242 times.
✓ Branch 31 taken 84 times.
|
1204089331 | while ((s_res = next()) >= 0) { |
| 1586 |
11/48✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 20287995 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 333 times.
✓ Branch 19 taken 20287662 times.
✓ Branch 20 taken 603859076 times.
✗ Branch 21 not taken.
✓ Branch 22 taken 498 times.
✓ Branch 23 taken 603858578 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 185431022 times.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 185431022 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✓ Branch 41 taken 2628 times.
✗ Branch 42 not taken.
✓ Branch 43 taken 62001 times.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✓ Branch 47 taken 19242 times.
|
809661964 | if (!func(s_res, s_res == 0)) return; |
| 1587 | } | ||
| 1588 | 197213332 | return; | |
| 1589 | } | ||
| 1590 | |||
| 1591 | /* | ||
| 1592 | Fast path. TODO: See if we can accept some character sets | ||
| 1593 | with tailorings. | ||
| 1594 | */ | ||
| 1595 | 202156181 | const uint16 *ascii_wpage = | |
| 1596 | 202156181 | UCA900_WEIGHT_ADDR(uca->weights[0], /*level=*/weight_lv, /*subcode=*/0); | |
| 1597 | |||
| 1598 | /* | ||
| 1599 | Precalculate the limit for the fast path below, taking care not to form | ||
| 1600 | pointers that are before sbeg, as those cannot be legally compared. | ||
| 1601 | (In particular, this catches the case of sbeg == send == nullptr.) | ||
| 1602 | */ | ||
| 1603 |
12/32✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 3145988 times.
✓ Branch 11 taken 198236 times.
✓ Branch 12 taken 3146566 times.
✓ Branch 13 taken 198304 times.
✓ Branch 14 taken 34434167 times.
✓ Branch 15 taken 9120897 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 28 times.
✓ Branch 27 taken 23 times.
✓ Branch 28 taken 6 times.
✓ Branch 29 taken 8 times.
✓ Branch 30 taken 124853839 times.
✓ Branch 31 taken 27058119 times.
|
202156181 | const uchar *send_local = (send - sbeg > 3) ? (send - 3) : sbeg; |
| 1604 | |||
| 1605 | 301346658 | for (;;) { | |
| 1606 | /* | ||
| 1607 | We could have more weights left from the previous call to next() | ||
| 1608 | (if any) that we need to deal with. | ||
| 1609 | */ | ||
| 1610 | int s_res; | ||
| 1611 |
11/32✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 3444795 times.
✓ Branch 11 taken 20040882 times.
✓ Branch 12 taken 3367697 times.
✓ Branch 13 taken 13362597 times.
✓ Branch 14 taken 3296700 times.
✓ Branch 15 taken 94403414 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 4 times.
✓ Branch 27 taken 459 times.
✓ Branch 28 taken 2 times.
✓ Branch 29 taken 56 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 375695147 times.
|
513612024 | while ((s_res = more_weight()) >= 0) { |
| 1612 |
11/48✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 3444795 times.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✓ Branch 23 taken 3444795 times.
✓ Branch 24 taken 3367697 times.
✗ Branch 25 not taken.
✓ Branch 26 taken 24 times.
✓ Branch 27 taken 3367673 times.
✓ Branch 28 taken 3296694 times.
✗ Branch 29 not taken.
✓ Branch 30 taken 9 times.
✓ Branch 31 taken 3296685 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✓ Branch 43 taken 4 times.
✗ Branch 44 not taken.
✓ Branch 45 taken 2 times.
✗ Branch 46 not taken.
✓ Branch 47 taken 26 times.
|
10109069 | if (!func(s_res, s_res == 0)) return; |
| 1613 | } | ||
| 1614 | |||
| 1615 | /* | ||
| 1616 | Loop in a simple fast path as long as we only have non-ignorable | ||
| 1617 | ASCII characters. These characters always have exactly a single weight | ||
| 1618 | and consist of only a single byte, so we can skip a lot of the checks | ||
| 1619 | we'd otherwise have to do. | ||
| 1620 | */ | ||
| 1621 | 503502555 | const uchar *sbeg_local = sbeg; | |
| 1622 |
33/96✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✓ Branch 30 taken 9441582 times.
✓ Branch 31 taken 10601019 times.
✓ Branch 32 taken 9441349 times.
✓ Branch 33 taken 233 times.
✓ Branch 34 taken 9441349 times.
✓ Branch 35 taken 10601252 times.
✓ Branch 36 taken 6296078 times.
✓ Branch 37 taken 7068789 times.
✓ Branch 38 taken 6295751 times.
✓ Branch 39 taken 327 times.
✓ Branch 40 taken 6295751 times.
✓ Branch 41 taken 7069116 times.
✓ Branch 42 taken 297507442 times.
✓ Branch 43 taken 88162919 times.
✓ Branch 44 taken 294430502 times.
✓ Branch 45 taken 3076873 times.
✓ Branch 46 taken 294430502 times.
✓ Branch 47 taken 91239792 times.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✓ Branch 78 taken 606 times.
✓ Branch 79 taken 402 times.
✓ Branch 80 taken 606 times.
✗ Branch 81 not taken.
✓ Branch 82 taken 606 times.
✓ Branch 83 taken 402 times.
✓ Branch 84 taken 348 times.
✓ Branch 85 taken 50 times.
✓ Branch 86 taken 348 times.
✗ Branch 87 not taken.
✓ Branch 88 taken 348 times.
✓ Branch 89 taken 50 times.
✓ Branch 90 taken 473259778 times.
✓ Branch 91 taken 375694112 times.
✓ Branch 92 taken 473260155 times.
✗ Branch 93 not taken.
✓ Branch 94 taken 473260265 times.
✓ Branch 95 taken 375693910 times.
|
1268033125 | while (sbeg_local < send_local && preaccept_data(sizeof(uint32))) { |
| 1623 | /* | ||
| 1624 | Check if all four bytes are in the range 0x20..0x7e, inclusive. | ||
| 1625 | These have exactly one weight. Note that this unfortunately does not | ||
| 1626 | include tab and newline, which would otherwise be legal candidates. | ||
| 1627 | |||
| 1628 | See the FastOutOfRange unit test for verification that the bitfiddling | ||
| 1629 | trick used here is correct. | ||
| 1630 | */ | ||
| 1631 | uint32 four_bytes; | ||
| 1632 | 783428821 | memcpy(&four_bytes, sbeg_local, sizeof(four_bytes)); | |
| 1633 |
12/32✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1719 times.
✓ Branch 11 taken 9439630 times.
✓ Branch 12 taken 2270 times.
✓ Branch 13 taken 6293481 times.
✓ Branch 14 taken 291274850 times.
✓ Branch 15 taken 3155652 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 549 times.
✓ Branch 27 taken 57 times.
✓ Branch 28 taken 342 times.
✓ Branch 29 taken 6 times.
✓ Branch 30 taken 473259061 times.
✓ Branch 31 taken 1204 times.
|
783428821 | if (((four_bytes + 0x01010101u) & 0x80808080) || |
| 1634 |
7/32✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1719 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 2270 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 291266947 times.
✓ Branch 15 taken 7903 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 549 times.
✗ Branch 27 not taken.
✓ Branch 28 taken 342 times.
✗ Branch 29 not taken.
✓ Branch 30 taken 473259130 times.
✗ Branch 31 not taken.
|
764538791 | ((four_bytes - 0x20202020u) & 0x80808080)) |
| 1635 | break; | ||
| 1636 | 764530957 | const int s_res0 = ascii_wpage[sbeg_local[0]]; | |
| 1637 | 764530957 | const int s_res1 = ascii_wpage[sbeg_local[1]]; | |
| 1638 | 764530957 | const int s_res2 = ascii_wpage[sbeg_local[2]]; | |
| 1639 | 764530957 | const int s_res3 = ascii_wpage[sbeg_local[3]]; | |
| 1640 |
6/32✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1719 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 2270 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 291266947 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 549 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 342 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 473259130 times.
|
764530957 | assert(s_res0 != 0); |
| 1641 |
6/32✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1719 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 2270 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 291266947 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 549 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 342 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 473259130 times.
|
764530957 | assert(s_res1 != 0); |
| 1642 |
6/32✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1719 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 2270 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 291266947 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 549 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 342 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 473259130 times.
|
764530957 | assert(s_res2 != 0); |
| 1643 |
6/32✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1719 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 2270 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 291266947 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✓ Branch 27 taken 549 times.
✗ Branch 28 not taken.
✓ Branch 29 taken 342 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 473259130 times.
|
764530957 | assert(s_res3 != 0); |
| 1644 |
3/16✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1719 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 2270 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 291266947 times.
✗ Branch 15 not taken.
|
764530957 | func(s_res0, /*is_level_separator=*/false); |
| 1645 |
3/16✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1719 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 2270 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 291266947 times.
✗ Branch 15 not taken.
|
764529713 | func(s_res1, /*is_level_separator=*/false); |
| 1646 |
3/16✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1719 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 2270 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 291266947 times.
✗ Branch 15 not taken.
|
764529965 | func(s_res2, /*is_level_separator=*/false); |
| 1647 |
3/16✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 1719 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 2270 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 291266947 times.
✗ Branch 15 not taken.
|
764530246 | func(s_res3, /*is_level_separator=*/false); |
| 1648 | 764530570 | sbeg_local += sizeof(uint32); | |
| 1649 | } | ||
| 1650 | 503502386 | sbeg = sbeg_local; | |
| 1651 | |||
| 1652 | // Do a single code point in the generic path. | ||
| 1653 | 503500242 | s_res = next_raw(); | |
| 1654 |
10/32✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 6688324 times.
✓ Branch 11 taken 13352558 times.
✓ Branch 12 taken 3344797 times.
✓ Branch 13 taken 10017800 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 94401530 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✓ Branch 26 taken 102 times.
✓ Branch 27 taken 357 times.
✓ Branch 28 taken 14 times.
✓ Branch 29 taken 42 times.
✗ Branch 30 not taken.
✓ Branch 31 taken 375694718 times.
|
503500242 | if (s_res == 0) { |
| 1655 | // Level separator, so we have to update our page pointer. | ||
| 1656 | 10033237 | ascii_wpage += UCA900_DISTANCE_BETWEEN_LEVELS; | |
| 1657 | } | ||
| 1658 |
37/112✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✓ Branch 40 taken 16696784 times.
✓ Branch 41 taken 3344098 times.
✓ Branch 42 taken 16696784 times.
✗ Branch 43 not taken.
✓ Branch 44 taken 126 times.
✓ Branch 45 taken 16696658 times.
✓ Branch 46 taken 3344224 times.
✓ Branch 47 taken 16696658 times.
✓ Branch 48 taken 10017877 times.
✓ Branch 49 taken 3344720 times.
✓ Branch 50 taken 10017877 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 126 times.
✓ Branch 53 taken 10017751 times.
✓ Branch 54 taken 3344846 times.
✓ Branch 55 taken 10017751 times.
✓ Branch 56 taken 55231120 times.
✓ Branch 57 taken 39170410 times.
✓ Branch 58 taken 55230804 times.
✗ Branch 59 not taken.
✓ Branch 60 taken 4381979 times.
✓ Branch 61 taken 50848825 times.
✓ Branch 62 taken 43552402 times.
✓ Branch 63 taken 50848812 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✓ Branch 94 taken 408 times.
✓ Branch 95 taken 51 times.
✗ Branch 96 not taken.
✓ Branch 97 taken 408 times.
✓ Branch 98 taken 51 times.
✓ Branch 99 taken 408 times.
✓ Branch 100 taken 42 times.
✓ Branch 101 taken 14 times.
✗ Branch 102 not taken.
✓ Branch 103 taken 42 times.
✓ Branch 104 taken 14 times.
✓ Branch 105 taken 42 times.
✓ Branch 106 taken 223782760 times.
✓ Branch 107 taken 151911958 times.
✓ Branch 108 taken 320 times.
✓ Branch 109 taken 223782703 times.
✓ Branch 110 taken 151911994 times.
✓ Branch 111 taken 223782987 times.
|
503500242 | if (s_res < 0 || !func(s_res, s_res == 0)) return; |
| 1659 | } | ||
| 1660 | } | ||
| 1661 | |||
| 1662 | /** | ||
| 1663 | Change a weight according to the reorder parameters. | ||
| 1664 | @param weight The weight to change | ||
| 1665 | @retval reordered weight | ||
| 1666 | */ | ||
| 1667 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 1668 | 199005436 | uint16 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::apply_reorder_param( | |
| 1669 | uint16 weight) { | ||
| 1670 | /* | ||
| 1671 | Chinese collation's reordering is done in next_implicit() and | ||
| 1672 | modify_all_zh_pages(). See the comment on zh_reorder_param and | ||
| 1673 | change_zh_implicit(). | ||
| 1674 | */ | ||
| 1675 |
2/2✓ Branch 0 taken 6502021 times.
✓ Branch 1 taken 93000697 times.
|
199005436 | if (cs->coll_param == &zh_coll_param) return weight; |
| 1676 | 186001394 | const Reorder_param *param = cs->coll_param->reorder_param; | |
| 1677 |
4/4✓ Branch 0 taken 92630596 times.
✓ Branch 1 taken 370101 times.
✓ Branch 2 taken 787196 times.
✓ Branch 3 taken 91843400 times.
|
186001394 | if (weight >= START_WEIGHT_TO_REORDER && weight <= param->max_weight) { |
| 1678 |
1/2✓ Branch 0 taken 2182167 times.
✗ Branch 1 not taken.
|
4364334 | for (int rec_ind = 0; rec_ind < param->wt_rec_num; ++rec_ind) { |
| 1679 | 4364334 | const Reorder_wt_rec *wt_rec = param->wt_rec + rec_ind; | |
| 1680 |
2/2✓ Branch 0 taken 1569824 times.
✓ Branch 1 taken 612343 times.
|
4364334 | if (weight >= wt_rec->old_wt_bdy.begin && |
| 1681 |
2/2✓ Branch 0 taken 787196 times.
✓ Branch 1 taken 782628 times.
|
3139648 | weight <= wt_rec->old_wt_bdy.end) { |
| 1682 | /* | ||
| 1683 | As commented in adjust_japanese_weight(), if this is a Japanese | ||
| 1684 | collation, for characters whose weight is between Latin and Kana | ||
| 1685 | group, and for the characters whose weight is between Kana and | ||
| 1686 | Han, we need to change their weight to be after all Han | ||
| 1687 | characters. We decide to give them the weights [FB86 0000 0000] | ||
| 1688 | [origin weight] to make sure the new weights are greater than | ||
| 1689 | the maximum implicit weight of Han characters. If this character's | ||
| 1690 | origin weight has more than one non-ignorable primary weight, for | ||
| 1691 | example, [AAAA 0020 0002][BBBB 0020 0002], both AAAA and BBBB need | ||
| 1692 | to be changed. The new weight should be: | ||
| 1693 | [FB86 0000 0000][AAAA 0020 0002][FB86 0000 0000][BBBB 0020 0002]. | ||
| 1694 | */ | ||
| 1695 |
4/4✓ Branch 0 taken 635782 times.
✓ Branch 1 taken 151414 times.
✓ Branch 2 taken 604098 times.
✓ Branch 3 taken 31684 times.
|
1574392 | if (param == &ja_reorder_param && wt_rec->new_wt_bdy.begin == 0) { |
| 1696 | 1208196 | return_origin_weight = !return_origin_weight; | |
| 1697 |
2/2✓ Branch 0 taken 302028 times.
✓ Branch 1 taken 302070 times.
|
1208196 | if (return_origin_weight) break; |
| 1698 | |||
| 1699 | /* | ||
| 1700 | We didn't consume the weight; rewind the iterator, so we will | ||
| 1701 | get another call where we can output it. | ||
| 1702 | */ | ||
| 1703 | 604140 | wbeg -= wbeg_stride; | |
| 1704 | 604140 | ++num_of_ce_left; | |
| 1705 | 604140 | return 0xFB86; | |
| 1706 | } | ||
| 1707 | |||
| 1708 | // Regular (non-Japanese-specific) reordering. | ||
| 1709 | 366196 | return weight - wt_rec->old_wt_bdy.begin + wt_rec->new_wt_bdy.begin; | |
| 1710 | } | ||
| 1711 | } | ||
| 1712 | } | ||
| 1713 | 185031058 | return weight; | |
| 1714 | } | ||
| 1715 | |||
| 1716 | // See Unicode TR35 section 3.14.1. | ||
| 1717 | 3415893 | static bool is_tertiary_weight_upper_case(uint16 weight) { | |
| 1718 |
10/10✓ Branch 0 taken 17050 times.
✓ Branch 1 taken 3398843 times.
✓ Branch 2 taken 8787 times.
✓ Branch 3 taken 8263 times.
✓ Branch 4 taken 3407405 times.
✓ Branch 5 taken 225 times.
✓ Branch 6 taken 3407168 times.
✓ Branch 7 taken 237 times.
✓ Branch 8 taken 3406829 times.
✓ Branch 9 taken 339 times.
|
3415893 | if ((weight >= 0x08 && weight <= 0x0C) || weight == 0x0E || weight == 0x11 || |
| 1719 |
2/2✓ Branch 0 taken 684 times.
✓ Branch 1 taken 3406145 times.
|
3406829 | weight == 0x12 || weight == 0x1D) |
| 1720 | 9748 | return true; | |
| 1721 | 3406145 | return false; | |
| 1722 | } | ||
| 1723 | |||
| 1724 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 1725 | 26889090 | uint16 uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::apply_case_first( | |
| 1726 | uint16 weight) { | ||
| 1727 | /* | ||
| 1728 | We only apply case weight change here when the character is not tailored. | ||
| 1729 | Tailored character's case weight has been changed in | ||
| 1730 | my_char_weight_put_900(). | ||
| 1731 | We have only 1 collation (Danish) needs to implement [caseFirst upper]. | ||
| 1732 | */ | ||
| 1733 |
5/6✓ Branch 0 taken 13444545 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3410862 times.
✓ Branch 3 taken 10033683 times.
✓ Branch 4 taken 3410555 times.
✓ Branch 5 taken 307 times.
|
26889090 | if (cs->coll_param->case_first == CASE_FIRST_UPPER && weight_lv == 2 && |
| 1734 | weight < 0x20) { | ||
| 1735 |
2/2✓ Branch 0 taken 7079 times.
✓ Branch 1 taken 3403476 times.
|
6821110 | if (is_tertiary_weight_upper_case(weight)) |
| 1736 | 14158 | weight |= CASE_FIRST_UPPER_MASK; | |
| 1737 | else | ||
| 1738 | 6806952 | weight |= CASE_FIRST_LOWER_MASK; | |
| 1739 | } | ||
| 1740 | 26889090 | return weight; | |
| 1741 | } | ||
| 1742 | |||
| 1743 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 1744 | ALWAYS_INLINE int uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::next() { | ||
| 1745 | 39218166089 | int res = next_raw(); | |
| 1746 | 39218166089 | Coll_param *param = cs->coll_param; | |
| 1747 |
56/160✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 13602000 times.
✓ Branch 17 taken 10028397 times.
✓ Branch 18 taken 13602000 times.
✗ Branch 19 not taken.
✓ Branch 20 taken 403316844 times.
✓ Branch 21 taken 300813069 times.
✓ Branch 22 taken 161350512 times.
✓ Branch 23 taken 241966332 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 185431022 times.
✓ Branch 29 taken 93599881 times.
✓ Branch 30 taken 39732257 times.
✓ Branch 31 taken 145698765 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 2520 times.
✓ Branch 49 taken 146 times.
✓ Branch 50 taken 2520 times.
✗ Branch 51 not taken.
✓ Branch 52 taken 61821 times.
✓ Branch 53 taken 270 times.
✓ Branch 54 taken 24732 times.
✓ Branch 55 taken 37089 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 19242 times.
✓ Branch 61 taken 84 times.
✓ Branch 62 taken 4122 times.
✓ Branch 63 taken 15120 times.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✓ Branch 112 taken 564 times.
✓ Branch 113 taken 162 times.
✓ Branch 114 taken 564 times.
✗ Branch 115 not taken.
✓ Branch 116 taken 563 times.
✓ Branch 117 taken 163 times.
✓ Branch 118 taken 563 times.
✗ Branch 119 not taken.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✓ Branch 124 taken 6933 times.
✓ Branch 125 taken 2119 times.
✓ Branch 126 taken 472 times.
✓ Branch 127 taken 6461 times.
✓ Branch 128 taken 6880 times.
✓ Branch 129 taken 2172 times.
✓ Branch 130 taken 456 times.
✓ Branch 131 taken 6424 times.
✓ Branch 132 taken 57 times.
✓ Branch 133 taken 39 times.
✗ Branch 134 not taken.
✓ Branch 135 taken 57 times.
✓ Branch 136 taken 14390 times.
✓ Branch 137 taken 1816 times.
✗ Branch 138 not taken.
✓ Branch 139 taken 14390 times.
✓ Branch 140 taken 14381 times.
✓ Branch 141 taken 1825 times.
✗ Branch 142 not taken.
✓ Branch 143 taken 14381 times.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✓ Branch 148 taken 18739260764 times.
✓ Branch 149 taken 346531268 times.
✓ Branch 150 taken 783 times.
✓ Branch 151 taken 18739259981 times.
✓ Branch 152 taken 18752629280 times.
✓ Branch 153 taken 372813301 times.
✓ Branch 154 taken 800 times.
✓ Branch 155 taken 18752628480 times.
✓ Branch 156 taken 2517 times.
✓ Branch 157 taken 1599 times.
✗ Branch 158 not taken.
✓ Branch 159 taken 2517 times.
|
39218166089 | if (res > 0 && param) { |
| 1748 | /* Reorder weight change only on primary level. */ | ||
| 1749 |
36/160✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 13602000 times.
✗ Branch 17 not taken.
✓ Branch 18 taken 6764423 times.
✓ Branch 19 taken 6837577 times.
✓ Branch 20 taken 107580689 times.
✓ Branch 21 taken 53769823 times.
✓ Branch 22 taken 52993719 times.
✓ Branch 23 taken 54586970 times.
✗ Branch 24 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 39732257 times.
✗ Branch 29 not taken.
✓ Branch 30 taken 39732257 times.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✓ Branch 48 taken 2520 times.
✗ Branch 49 not taken.
✓ Branch 50 taken 793 times.
✓ Branch 51 taken 1727 times.
✓ Branch 52 taken 16488 times.
✓ Branch 53 taken 8244 times.
✓ Branch 54 taken 5496 times.
✓ Branch 55 taken 10992 times.
✗ Branch 56 not taken.
✗ Branch 57 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✓ Branch 60 taken 4122 times.
✗ Branch 61 not taken.
✓ Branch 62 taken 4122 times.
✗ Branch 63 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✗ Branch 75 not taken.
✗ Branch 76 not taken.
✗ Branch 77 not taken.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
✗ Branch 80 not taken.
✗ Branch 81 not taken.
✗ Branch 82 not taken.
✗ Branch 83 not taken.
✗ Branch 84 not taken.
✗ Branch 85 not taken.
✗ Branch 86 not taken.
✗ Branch 87 not taken.
✗ Branch 88 not taken.
✗ Branch 89 not taken.
✗ Branch 90 not taken.
✗ Branch 91 not taken.
✗ Branch 92 not taken.
✗ Branch 93 not taken.
✗ Branch 94 not taken.
✗ Branch 95 not taken.
✗ Branch 96 not taken.
✗ Branch 97 not taken.
✗ Branch 98 not taken.
✗ Branch 99 not taken.
✗ Branch 100 not taken.
✗ Branch 101 not taken.
✗ Branch 102 not taken.
✗ Branch 103 not taken.
✗ Branch 104 not taken.
✗ Branch 105 not taken.
✗ Branch 106 not taken.
✗ Branch 107 not taken.
✗ Branch 108 not taken.
✗ Branch 109 not taken.
✗ Branch 110 not taken.
✗ Branch 111 not taken.
✓ Branch 112 taken 564 times.
✗ Branch 113 not taken.
✓ Branch 114 taken 149 times.
✓ Branch 115 taken 415 times.
✓ Branch 116 taken 563 times.
✗ Branch 117 not taken.
✓ Branch 118 taken 148 times.
✓ Branch 119 taken 415 times.
✗ Branch 120 not taken.
✗ Branch 121 not taken.
✗ Branch 122 not taken.
✗ Branch 123 not taken.
✓ Branch 124 taken 41 times.
✓ Branch 125 taken 431 times.
✓ Branch 126 taken 14 times.
✓ Branch 127 taken 27 times.
✓ Branch 128 taken 45 times.
✓ Branch 129 taken 411 times.
✓ Branch 130 taken 14 times.
✓ Branch 131 taken 31 times.
✗ Branch 132 not taken.
✗ Branch 133 not taken.
✗ Branch 134 not taken.
✗ Branch 135 not taken.
✗ Branch 136 not taken.
✗ Branch 137 not taken.
✗ Branch 138 not taken.
✗ Branch 139 not taken.
✗ Branch 140 not taken.
✗ Branch 141 not taken.
✗ Branch 142 not taken.
✗ Branch 143 not taken.
✗ Branch 144 not taken.
✗ Branch 145 not taken.
✗ Branch 146 not taken.
✗ Branch 147 not taken.
✓ Branch 148 taken 783 times.
✗ Branch 149 not taken.
✓ Branch 150 taken 783 times.
✗ Branch 151 not taken.
✓ Branch 152 taken 800 times.
✗ Branch 153 not taken.
✓ Branch 154 taken 800 times.
✗ Branch 155 not taken.
✗ Branch 156 not taken.
✗ Branch 157 not taken.
✗ Branch 158 not taken.
✗ Branch 159 not taken.
|
214719781 | if (param->reorder_param && weight_lv == 0) res = apply_reorder_param(res); |
| 1750 |
16/80✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 13602000 times.
✓ Branch 10 taken 13442191 times.
✓ Branch 11 taken 147908321 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✓ Branch 15 taken 39732257 times.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 24 not taken.
✓ Branch 25 taken 2520 times.
✓ Branch 26 taken 2061 times.
✓ Branch 27 taken 22671 times.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 30 not taken.
✓ Branch 31 taken 4122 times.
✗ Branch 32 not taken.
✗ Branch 33 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 36 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 39 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 42 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 54 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✓ Branch 57 taken 564 times.
✗ Branch 58 not taken.
✓ Branch 59 taken 563 times.
✗ Branch 60 not taken.
✗ Branch 61 not taken.
✓ Branch 62 taken 151 times.
✓ Branch 63 taken 321 times.
✓ Branch 64 taken 142 times.
✓ Branch 65 taken 314 times.
✗ Branch 66 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 69 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
✗ Branch 72 not taken.
✗ Branch 73 not taken.
✗ Branch 74 not taken.
✓ Branch 75 taken 783 times.
✗ Branch 76 not taken.
✓ Branch 77 taken 800 times.
✗ Branch 78 not taken.
✗ Branch 79 not taken.
|
214719781 | if (param->case_first != CASE_FIRST_OFF) res = apply_case_first(res); |
| 1751 | } | ||
| 1752 | 39218166089 | return res; | |
| 1753 | } | ||
| 1754 | |||
| 1755 | /* | ||
| 1756 | Compares two strings according to the collation | ||
| 1757 | |||
| 1758 | SYNOPSIS: | ||
| 1759 | my_strnncoll_uca() | ||
| 1760 | cs Character set information | ||
| 1761 | s First string | ||
| 1762 | slen First string length | ||
| 1763 | t Second string | ||
| 1764 | tlen Second string length | ||
| 1765 | |||
| 1766 | NOTES: | ||
| 1767 | Initializes two weight scanners and gets weights | ||
| 1768 | corresponding to two strings in a loop. If weights are not | ||
| 1769 | the same at some step then returns their difference. | ||
| 1770 | |||
| 1771 | In the while() comparison these situations are possible: | ||
| 1772 | 1. (s_res>0) and (t_res>0) and (s_res == t_res) | ||
| 1773 | Weights are the same so far, continue comparison | ||
| 1774 | 2. (s_res>0) and (t_res>0) and (s_res!=t_res) | ||
| 1775 | A difference has been found, return. | ||
| 1776 | 3. (s_res>0) and (t_res<0) | ||
| 1777 | We have reached the end of the second string, or found | ||
| 1778 | an illegal multibyte sequence in the second string. | ||
| 1779 | Return a positive number, i.e. the first string is bigger. | ||
| 1780 | 4. (s_res<0) and (t_res>0) | ||
| 1781 | We have reached the end of the first string, or found | ||
| 1782 | an illegal multibyte sequence in the first string. | ||
| 1783 | Return a negative number, i.e. the second string is bigger. | ||
| 1784 | 5. (s_res<0) and (t_res<0) | ||
| 1785 | Both scanners returned -1. It means we have riched | ||
| 1786 | the end-of-string of illegal-sequence in both strings | ||
| 1787 | at the same time. Return 0, strings are equal. | ||
| 1788 | |||
| 1789 | RETURN | ||
| 1790 | Difference between two strings, according to the collation: | ||
| 1791 | 0 - means strings are equal | ||
| 1792 | negative number - means the first string is smaller | ||
| 1793 | positive number - means the first string is bigger | ||
| 1794 | */ | ||
| 1795 | |||
| 1796 | template <class Scanner, int LEVELS_FOR_COMPARE, class Mb_wc> | ||
| 1797 | 10423188002 | static int my_strnncoll_uca(const CHARSET_INFO *cs, const Mb_wc mb_wc, | |
| 1798 | const uchar *s, size_t slen, const uchar *t, | ||
| 1799 | size_t tlen, bool t_is_prefix) { | ||
| 1800 | 10423188002 | Scanner sscanner(mb_wc, cs, s, slen); | |
| 1801 | 10423073348 | Scanner tscanner(mb_wc, cs, t, tlen); | |
| 1802 | 10422907406 | int s_res = 0; | |
| 1803 | 10422907406 | int t_res = 0; | |
| 1804 | |||
| 1805 | /* | ||
| 1806 | We compare 2 strings in same level first. If only string A's scanner | ||
| 1807 | has gone to next level, which means another string, B's weight of | ||
| 1808 | current level is longer than A's. We'll compare B's remaining weights | ||
| 1809 | with space. | ||
| 1810 | */ | ||
| 1811 |
1/2✓ Branch 0 taken 5211497328 times.
✗ Branch 1 not taken.
|
10422913132 | for (uint current_lv = 0; current_lv < LEVELS_FOR_COMPARE; ++current_lv) { |
| 1812 | /* Run the scanners until one of them runs out of current lv */ | ||
| 1813 | do { | ||
| 1814 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 182 times.
|
38171636396 | s_res = sscanner.next(); |
| 1815 | 38250937494 | t_res = tscanner.next(); | |
| 1816 |
4/4✓ Branch 0 taken 13973876528 times.
✓ Branch 1 taken 403196250 times.
✓ Branch 2 taken 13863831101 times.
✓ Branch 3 taken 90007467 times.
|
56661822692 | } while (s_res == t_res && s_res >= 0 && |
| 1817 |
5/6✓ Branch 0 taken 14377072778 times.
✓ Branch 1 taken 4748395969 times.
✓ Branch 2 taken 13953403909 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 13953360344 times.
✓ Branch 5 taken 5211930012 times.
|
95032479324 | sscanner.get_weight_level() == current_lv && |
| 1818 | 27727662202 | tscanner.get_weight_level() == current_lv); | |
| 1819 | |||
| 1820 | /* | ||
| 1821 | Two scanners run to next level at same time, or we found a difference, | ||
| 1822 | or we found an error. | ||
| 1823 | */ | ||
| 1824 |
2/2✓ Branch 0 taken 5207174308 times.
✓ Branch 1 taken 4710188 times.
|
10423860024 | if (sscanner.get_weight_level() == tscanner.get_weight_level()) { |
| 1825 |
4/4✓ Branch 0 taken 366010689 times.
✓ Branch 1 taken 4841163619 times.
✓ Branch 2 taken 2833 times.
✓ Branch 3 taken 366007856 times.
|
10414348616 | if (s_res == t_res && s_res >= 0) continue; |
| 1826 | 10414342950 | break; // Error or inequality found, end. | |
| 1827 | } | ||
| 1828 | |||
| 1829 |
2/2✓ Branch 0 taken 1852950 times.
✓ Branch 1 taken 2857238 times.
|
9420376 | if (tscanner.get_weight_level() > current_lv) { |
| 1830 | // t ran out of weights on this level, and s didn't. | ||
| 1831 |
2/2✓ Branch 0 taken 1638 times.
✓ Branch 1 taken 1851312 times.
|
3705900 | if (t_is_prefix) { |
| 1832 | // Consume the rest of the weights from s. | ||
| 1833 | do { | ||
| 1834 | 8424 | s_res = sscanner.next(); | |
| 1835 |
6/6✓ Branch 0 taken 2604 times.
✓ Branch 1 taken 1608 times.
✓ Branch 2 taken 2574 times.
✓ Branch 3 taken 30 times.
✓ Branch 4 taken 2574 times.
✓ Branch 5 taken 1638 times.
|
8424 | } while (s_res >= 0 && sscanner.get_weight_level() == current_lv); |
| 1836 | |||
| 1837 |
2/2✓ Branch 0 taken 1608 times.
✓ Branch 1 taken 30 times.
|
3276 | if (s_res < 0) break; // Error found, end. |
| 1838 | |||
| 1839 | // s is now also on the next level. Continue comparison. | ||
| 1840 | 60 | continue; | |
| 1841 | } else { | ||
| 1842 | // s is longer than t (and t_prefix isn't set). | ||
| 1843 | 3702624 | return 1; | |
| 1844 | } | ||
| 1845 | } | ||
| 1846 | |||
| 1847 |
1/2✓ Branch 0 taken 2857238 times.
✗ Branch 1 not taken.
|
5714476 | if (sscanner.get_weight_level() > current_lv) { |
| 1848 | // s ran out of weights on this level, and t didn't. | ||
| 1849 | 5714476 | return -1; | |
| 1850 | } | ||
| 1851 | |||
| 1852 | ✗ | break; | |
| 1853 | } | ||
| 1854 | |||
| 1855 | 10414264642 | return (s_res - t_res); | |
| 1856 | } | ||
| 1857 | |||
| 1858 | 974136 | static inline int my_space_weight(const CHARSET_INFO *cs) /* W3-TODO */ | |
| 1859 | { | ||
| 1860 |
2/4✓ Branch 0 taken 974136 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 974136 times.
|
974136 | if (cs->uca && cs->uca->version == UCA_V900) |
| 1861 | ✗ | return UCA900_WEIGHT(cs->uca->weights[0], /*weight_lv=*/0, 0x20); | |
| 1862 | else | ||
| 1863 | 974136 | return cs->uca->weights[0][0x20 * cs->uca->lengths[0]]; | |
| 1864 | } | ||
| 1865 | |||
| 1866 | /** | ||
| 1867 | Helper function: | ||
| 1868 | Find address of weights of the given code point. | ||
| 1869 | |||
| 1870 | @param uca Pointer to UCA data | ||
| 1871 | @param wc character Unicode code point | ||
| 1872 | |||
| 1873 | @return Weight array | ||
| 1874 | @retval pointer to weight array for the given code point, | ||
| 1875 | or nullptr if this page does not have implicit weights. | ||
| 1876 | */ | ||
| 1877 | |||
| 1878 | 1018732 | static inline uint16 *my_char_weight_addr(MY_UCA_INFO *uca, my_wc_t wc) { | |
| 1879 | uint page, ofst; | ||
| 1880 |
1/2✓ Branch 0 taken 1018732 times.
✗ Branch 1 not taken.
|
2037464 | return wc > uca->maxchar ? nullptr |
| 1881 | 1018732 | : (uca->weights[page = (wc >> 8)] | |
| 1882 |
1/2✓ Branch 0 taken 1018732 times.
✗ Branch 1 not taken.
|
1018732 | ? uca->weights[page] + (ofst = (wc & 0xFF)) * |
| 1883 | 1018732 | uca->lengths[page] | |
| 1884 | 1018732 | : nullptr); | |
| 1885 | } | ||
| 1886 | |||
| 1887 | /** | ||
| 1888 | Helper function: | ||
| 1889 | Find address of weights of the given code point, for UCA 9.0.0 format. | ||
| 1890 | |||
| 1891 | @param uca Pointer to UCA data | ||
| 1892 | @param wc character Unicode code point | ||
| 1893 | |||
| 1894 | @return Weight array | ||
| 1895 | @retval pointer to weight array for the given code point, | ||
| 1896 | or nullptr if this page does not have implicit weights. | ||
| 1897 | */ | ||
| 1898 | |||
| 1899 | 296798798 | static inline uint16 *my_char_weight_addr_900(MY_UCA_INFO *uca, my_wc_t wc) { | |
| 1900 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 296798798 times.
|
296798798 | if (wc > uca->maxchar) return nullptr; |
| 1901 | |||
| 1902 | 296798798 | uint page = wc >> 8; | |
| 1903 | 296798798 | uint ofst = wc & 0xFF; | |
| 1904 | 296798798 | uint16 *weights = uca->weights[page]; | |
| 1905 |
2/2✓ Branch 0 taken 296795184 times.
✓ Branch 1 taken 3614 times.
|
296798798 | if (weights) |
| 1906 | 296795184 | return UCA900_WEIGHT_ADDR(weights, /*level=*/0, ofst); | |
| 1907 | else | ||
| 1908 | 3614 | return nullptr; | |
| 1909 | } | ||
| 1910 | |||
| 1911 | /* | ||
| 1912 | Compares two strings according to the collation, | ||
| 1913 | ignoring trailing spaces. | ||
| 1914 | |||
| 1915 | SYNOPSIS: | ||
| 1916 | my_strnncollsp_uca() | ||
| 1917 | cs Character set information | ||
| 1918 | s First string | ||
| 1919 | slen First string length | ||
| 1920 | t Second string | ||
| 1921 | tlen Second string length | ||
| 1922 | |||
| 1923 | NOTES: | ||
| 1924 | Works exactly the same with my_strnncoll_uca(), | ||
| 1925 | but ignores trailing spaces. | ||
| 1926 | |||
| 1927 | In the while() comparison these situations are possible: | ||
| 1928 | 1. (s_res>0) and (t_res>0) and (s_res == t_res) | ||
| 1929 | Weights are the same so far, continue comparison | ||
| 1930 | 2. (s_res>0) and (t_res>0) and (s_res!=t_res) | ||
| 1931 | A difference has been found, return. | ||
| 1932 | 3. (s_res>0) and (t_res<0) | ||
| 1933 | We have reached the end of the second string, or found | ||
| 1934 | an illegal multibyte sequence in the second string. | ||
| 1935 | Compare the first string to an infinite array of | ||
| 1936 | space characters until difference is found, or until | ||
| 1937 | the end of the first string. | ||
| 1938 | 4. (s_res<0) and (t_res>0) | ||
| 1939 | We have reached the end of the first string, or found | ||
| 1940 | an illegal multibyte sequence in the first string. | ||
| 1941 | Compare the second string to an infinite array of | ||
| 1942 | space characters until difference is found or until | ||
| 1943 | the end of the second steing. | ||
| 1944 | 5. (s_res<0) and (t_res<0) | ||
| 1945 | Both scanners returned -1. It means we have riched | ||
| 1946 | the end-of-string of illegal-sequence in both strings | ||
| 1947 | at the same time. Return 0, strings are equal. | ||
| 1948 | |||
| 1949 | RETURN | ||
| 1950 | Difference between two strings, according to the collation: | ||
| 1951 | 0 - means strings are equal | ||
| 1952 | negative number - means the first string is smaller | ||
| 1953 | positive number - means the first string is bigger | ||
| 1954 | */ | ||
| 1955 | |||
| 1956 | template <class Mb_wc> | ||
| 1957 | 553356 | static int my_strnncollsp_uca(const CHARSET_INFO *cs, Mb_wc mb_wc, | |
| 1958 | const uchar *s, size_t slen, const uchar *t, | ||
| 1959 | size_t tlen) { | ||
| 1960 | int s_res, t_res; | ||
| 1961 | |||
| 1962 | 553356 | uca_scanner_any<Mb_wc> sscanner(mb_wc, cs, s, slen); | |
| 1963 | 553366 | uca_scanner_any<Mb_wc> tscanner(mb_wc, cs, t, tlen); | |
| 1964 | |||
| 1965 | do { | ||
| 1966 |
2/2✓ Branch 0 taken 105108 times.
✓ Branch 1 taken 7728108 times.
|
15666432 | s_res = sscanner.next(); |
| 1967 | 15667062 | t_res = tscanner.next(); | |
| 1968 |
4/4✓ Branch 0 taken 7805855 times.
✓ Branch 1 taken 27676 times.
✓ Branch 2 taken 7556839 times.
✓ Branch 3 taken 249016 times.
|
15667062 | } while (s_res == t_res && s_res > 0); |
| 1969 | |||
| 1970 |
4/4✓ Branch 0 taken 21618 times.
✓ Branch 1 taken 255074 times.
✓ Branch 2 taken 438 times.
✓ Branch 3 taken 21180 times.
|
553384 | if (s_res > 0 && t_res < 0) { |
| 1971 | /* Calculate weight for SPACE character */ | ||
| 1972 | 876 | t_res = my_space_weight(cs); | |
| 1973 | |||
| 1974 | /* compare the first string to spaces */ | ||
| 1975 | do { | ||
| 1976 |
2/2✓ Branch 0 taken 201 times.
✓ Branch 1 taken 376 times.
|
1154 | if (s_res != t_res) return (s_res - t_res); |
| 1977 | 752 | s_res = sscanner.next(); | |
| 1978 |
2/2✓ Branch 0 taken 139 times.
✓ Branch 1 taken 237 times.
|
752 | } while (s_res > 0); |
| 1979 | 474 | return 0; | |
| 1980 | } | ||
| 1981 | |||
| 1982 |
4/4✓ Branch 0 taken 255075 times.
✓ Branch 1 taken 21179 times.
✓ Branch 2 taken 6015 times.
✓ Branch 3 taken 249060 times.
|
552508 | if (s_res < 0 && t_res > 0) { |
| 1983 | /* Calculate weight for SPACE character */ | ||
| 1984 | 12030 | s_res = my_space_weight(cs); | |
| 1985 | |||
| 1986 | /* compare the second string to spaces */ | ||
| 1987 | do { | ||
| 1988 |
2/2✓ Branch 0 taken 5577 times.
✓ Branch 1 taken 1448 times.
|
14050 | if (s_res != t_res) return (s_res - t_res); |
| 1989 | 2896 | t_res = tscanner.next(); | |
| 1990 |
2/2✓ Branch 0 taken 1010 times.
✓ Branch 1 taken 438 times.
|
2896 | } while (t_res > 0); |
| 1991 | 876 | return 0; | |
| 1992 | } | ||
| 1993 | |||
| 1994 | 540478 | return (s_res - t_res); | |
| 1995 | } | ||
| 1996 | |||
| 1997 | /* | ||
| 1998 | Calculates hash value for the given string, | ||
| 1999 | according to the collation, and ignoring trailing spaces. | ||
| 2000 | |||
| 2001 | SYNOPSIS: | ||
| 2002 | my_hash_sort_uca() | ||
| 2003 | cs Character set information | ||
| 2004 | s String | ||
| 2005 | slen String's length | ||
| 2006 | n1 First hash parameter | ||
| 2007 | n2 Second hash parameter | ||
| 2008 | |||
| 2009 | NOTES: | ||
| 2010 | Scans consequently weights and updates | ||
| 2011 | hash parameters n1 and n2. In a case insensitive collation, | ||
| 2012 | upper and lower case of the same letter will return the same | ||
| 2013 | weight sequence, and thus will produce the same hash values | ||
| 2014 | in n1 and n2. | ||
| 2015 | |||
| 2016 | RETURN | ||
| 2017 | N/A | ||
| 2018 | */ | ||
| 2019 | |||
| 2020 | template <class Mb_wc> | ||
| 2021 | 5568 | static void my_hash_sort_uca(const CHARSET_INFO *cs, Mb_wc mb_wc, | |
| 2022 | const uchar *s, size_t slen, uint64 *n1, | ||
| 2023 | uint64 *n2) { | ||
| 2024 | int s_res; | ||
| 2025 | uint64 tmp1; | ||
| 2026 | uint64 tmp2; | ||
| 2027 | |||
| 2028 |
1/2✓ Branch 0 taken 2784 times.
✗ Branch 1 not taken.
|
5568 | slen = cs->cset->lengthsp(cs, pointer_cast<const char *>(s), slen); |
| 2029 | 5568 | uca_scanner_any<Mb_wc> scanner(mb_wc, cs, s, slen); | |
| 2030 | |||
| 2031 | 5568 | tmp1 = *n1; | |
| 2032 | 5568 | tmp2 = *n2; | |
| 2033 | |||
| 2034 |
2/2✓ Branch 0 taken 126188 times.
✓ Branch 1 taken 2784 times.
|
263512 | while ((s_res = scanner.next()) > 0) { |
| 2035 | 252376 | tmp1 ^= (((tmp1 & 63) + tmp2) * (s_res >> 8)) + (tmp1 << 8); | |
| 2036 | 252376 | tmp2 += 3; | |
| 2037 | 252376 | tmp1 ^= (((tmp1 & 63) + tmp2) * (s_res & 0xFF)) + (tmp1 << 8); | |
| 2038 | 252376 | tmp2 += 3; | |
| 2039 | } | ||
| 2040 | |||
| 2041 | 5568 | *n1 = tmp1; | |
| 2042 | 5568 | *n2 = tmp2; | |
| 2043 | 5568 | } | |
| 2044 | |||
| 2045 | /* | ||
| 2046 | For the given string creates its "binary image", suitable | ||
| 2047 | to be used in binary comparison, i.e. in memcmp(). | ||
| 2048 | |||
| 2049 | SYNOPSIS: | ||
| 2050 | my_strnxfrm_uca() | ||
| 2051 | cs Character set information | ||
| 2052 | dst Where to write the image | ||
| 2053 | dstlen Space available for the image, in bytes | ||
| 2054 | src The source string | ||
| 2055 | srclen Length of the source string, in bytes | ||
| 2056 | |||
| 2057 | NOTES: | ||
| 2058 | In a loop, scans weights from the source string and writes | ||
| 2059 | them into the binary image. In a case insensitive collation, | ||
| 2060 | upper and lower cases of the same letter will produce the | ||
| 2061 | same image subsequences. When we have reached the end-of-string | ||
| 2062 | or found an illegal multibyte sequence, the loop stops. | ||
| 2063 | |||
| 2064 | It is impossible to restore the original string using its | ||
| 2065 | binary image. | ||
| 2066 | |||
| 2067 | Binary images are used for bulk comparison purposes, | ||
| 2068 | e.g. in ORDER BY, when it is more efficient to create | ||
| 2069 | a binary image and use it instead of weight scanner | ||
| 2070 | for the original strings for every comparison. | ||
| 2071 | |||
| 2072 | RETURN | ||
| 2073 | Number of bytes that have been written into the binary image. | ||
| 2074 | */ | ||
| 2075 | |||
| 2076 | template <class Mb_wc> | ||
| 2077 | 507239098 | static size_t my_strnxfrm_uca(const CHARSET_INFO *cs, Mb_wc mb_wc, uchar *dst, | |
| 2078 | size_t dstlen, uint num_codepoints, | ||
| 2079 | const uchar *src, size_t srclen, uint flags) { | ||
| 2080 | 507239098 | uchar *d0 = dst; | |
| 2081 | 507239098 | uchar *de = dst + dstlen; | |
| 2082 | int s_res; | ||
| 2083 | 507239098 | uca_scanner_any<Mb_wc> scanner(mb_wc, cs, src, srclen); | |
| 2084 | |||
| 2085 |
6/6✓ Branch 0 taken 540461295 times.
✓ Branch 1 taken 248 times.
✓ Branch 2 taken 286841994 times.
✓ Branch 3 taken 253619301 times.
✓ Branch 4 taken 286841994 times.
✓ Branch 5 taken 253619549 times.
|
2161845676 | while (dst < de && (s_res = scanner.next()) > 0) { |
| 2086 | 573683988 | *dst++ = s_res >> 8; | |
| 2087 |
1/2✓ Branch 0 taken 286841994 times.
✗ Branch 1 not taken.
|
573683988 | if (dst < de) *dst++ = s_res & 0xFF; |
| 2088 | } | ||
| 2089 | |||
| 2090 |
2/2✓ Branch 0 taken 253619301 times.
✓ Branch 1 taken 248 times.
|
507239098 | if (dst < de) { |
| 2091 | /* | ||
| 2092 | PAD SPACE behavior. | ||
| 2093 | |||
| 2094 | We still have space left in the output buffer, which must mean | ||
| 2095 | that the scanner is at the end of the last level. Find out | ||
| 2096 | how many weights we wrote per level, and add any remaining | ||
| 2097 | spaces we need to get us up to the requested total. | ||
| 2098 | */ | ||
| 2099 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 253619301 times.
|
507238602 | assert(num_codepoints >= scanner.get_char_index()); |
| 2100 | 507238602 | num_codepoints -= scanner.get_char_index(); | |
| 2101 | |||
| 2102 |
2/2✓ Branch 0 taken 627887 times.
✓ Branch 1 taken 252991414 times.
|
507238602 | if (num_codepoints) { |
| 2103 | 1255774 | uint space_count = std::min<uint>((de - dst) / 2, num_codepoints); | |
| 2104 | 1255774 | s_res = my_space_weight(cs); | |
| 2105 |
2/2✓ Branch 0 taken 1783514 times.
✓ Branch 1 taken 627887 times.
|
4822802 | for (; space_count; space_count--) { |
| 2106 | 7134056 | dst = store16be(dst, s_res); | |
| 2107 | } | ||
| 2108 | } | ||
| 2109 | } | ||
| 2110 |
4/4✓ Branch 0 taken 340005 times.
✓ Branch 1 taken 253279544 times.
✓ Branch 2 taken 339796 times.
✓ Branch 3 taken 209 times.
|
507239098 | if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de) { |
| 2111 | 679592 | s_res = my_space_weight(cs); | |
| 2112 |
2/2✓ Branch 0 taken 23797171 times.
✓ Branch 1 taken 339796 times.
|
48273934 | for (; dst < de;) { |
| 2113 | 47594342 | *dst++ = s_res >> 8; | |
| 2114 |
1/2✓ Branch 0 taken 23797171 times.
✗ Branch 1 not taken.
|
47594342 | if (dst < de) *dst++ = s_res & 0xFF; |
| 2115 | } | ||
| 2116 | } | ||
| 2117 | 507239098 | return dst - d0; | |
| 2118 | } | ||
| 2119 | |||
| 2120 | 148148069 | static int my_uca_charcmp_900(const CHARSET_INFO *cs, my_wc_t wc1, | |
| 2121 | my_wc_t wc2) { | ||
| 2122 | 148148069 | uint16 *weight1_ptr = my_char_weight_addr_900(cs->uca, wc1); /* W3-TODO */ | |
| 2123 | 148148069 | uint16 *weight2_ptr = my_char_weight_addr_900(cs->uca, wc2); | |
| 2124 | |||
| 2125 | /* Check if some of the characters does not have implicit weights */ | ||
| 2126 |
4/4✓ Branch 0 taken 148146934 times.
✓ Branch 1 taken 1135 times.
✓ Branch 2 taken 1812 times.
✓ Branch 3 taken 148145122 times.
|
148148069 | if (!weight1_ptr || !weight2_ptr) return wc1 != wc2; |
| 2127 | |||
| 2128 |
5/6✓ Branch 0 taken 148144905 times.
✓ Branch 1 taken 217 times.
✓ Branch 2 taken 148144905 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 146229511 times.
✓ Branch 5 taken 1915394 times.
|
148145122 | if (weight1_ptr[0] && weight2_ptr[0] && weight1_ptr[0] != weight2_ptr[0]) |
| 2129 | 146229511 | return 1; | |
| 2130 | |||
| 2131 | /* Thoroughly compare all weights */ | ||
| 2132 | 1915611 | size_t length1 = weight1_ptr[-UCA900_DISTANCE_BETWEEN_LEVELS]; | |
| 2133 | 1915611 | size_t length2 = weight2_ptr[-UCA900_DISTANCE_BETWEEN_LEVELS]; | |
| 2134 | |||
| 2135 |
2/2✓ Branch 0 taken 1915708 times.
✓ Branch 1 taken 1915364 times.
|
3831072 | for (int level = 0; level < cs->levels_for_compare; ++level) { |
| 2136 | 1915708 | size_t wt_ind1 = 0; | |
| 2137 | 1915708 | size_t wt_ind2 = 0; | |
| 2138 | 1915708 | uint16 *weight1 = weight1_ptr + level * UCA900_DISTANCE_BETWEEN_LEVELS; | |
| 2139 | 1915708 | uint16 *weight2 = weight2_ptr + level * UCA900_DISTANCE_BETWEEN_LEVELS; | |
| 2140 |
4/4✓ Branch 0 taken 1915856 times.
✓ Branch 1 taken 1915385 times.
✓ Branch 2 taken 1915814 times.
✓ Branch 3 taken 42 times.
|
3831241 | while (wt_ind1 < length1 && wt_ind2 < length2) { |
| 2141 | // Zero weight is ignorable. | ||
| 2142 |
4/4✓ Branch 0 taken 1915814 times.
✓ Branch 1 taken 272 times.
✓ Branch 2 taken 272 times.
✓ Branch 3 taken 1915542 times.
|
1916086 | for (; wt_ind1 < length1 && !*weight1; wt_ind1++) |
| 2143 | 272 | weight1 += UCA900_DISTANCE_BETWEEN_WEIGHTS; | |
| 2144 |
2/2✓ Branch 0 taken 272 times.
✓ Branch 1 taken 1915542 times.
|
1915814 | if (wt_ind1 == length1) break; |
| 2145 |
2/4✓ Branch 0 taken 1915542 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1915542 times.
|
1915542 | for (; wt_ind2 < length2 && !*weight2; wt_ind2++) |
| 2146 | ✗ | weight2 += UCA900_DISTANCE_BETWEEN_WEIGHTS; | |
| 2147 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1915542 times.
|
1915542 | if (wt_ind2 == length2) break; |
| 2148 | |||
| 2149 | // Check if these two non-ignorable weights are equal. | ||
| 2150 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1915533 times.
|
1915542 | if (*weight1 != *weight2) return 1; |
| 2151 | 1915533 | wt_ind1++; | |
| 2152 | 1915533 | wt_ind2++; | |
| 2153 | 1915533 | weight1 += UCA900_DISTANCE_BETWEEN_WEIGHTS; | |
| 2154 | 1915533 | weight2 += UCA900_DISTANCE_BETWEEN_WEIGHTS; | |
| 2155 | } | ||
| 2156 | /* | ||
| 2157 | If either character is out of weights but we have equality so far, | ||
| 2158 | check if the other character has any non-ignorable weights left. | ||
| 2159 | */ | ||
| 2160 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 1915693 times.
|
1915735 | for (; wt_ind1 < length1; wt_ind1++) { |
| 2161 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 36 times.
|
42 | if (*weight1) return 1; |
| 2162 | 36 | weight1 += UCA900_DISTANCE_BETWEEN_WEIGHTS; | |
| 2163 | } | ||
| 2164 |
2/2✓ Branch 0 taken 305 times.
✓ Branch 1 taken 1915461 times.
|
1915766 | for (; wt_ind2 < length2; wt_ind2++) { |
| 2165 |
2/2✓ Branch 0 taken 232 times.
✓ Branch 1 taken 73 times.
|
305 | if (*weight2) return 1; |
| 2166 | 73 | weight2 += UCA900_DISTANCE_BETWEEN_WEIGHTS; | |
| 2167 | } | ||
| 2168 | } | ||
| 2169 | 1915364 | return 0; | |
| 2170 | } | ||
| 2171 | |||
| 2172 | /* | ||
| 2173 | This function compares if two code points are the same. | ||
| 2174 | The sign +1 or -1 does not matter. The only | ||
| 2175 | important thing is that the result is 0 or not 0. | ||
| 2176 | This fact allows us to use memcmp() safely, on both | ||
| 2177 | little-endian and big-endian machines. | ||
| 2178 | */ | ||
| 2179 | |||
| 2180 | 182086189 | static int my_uca_charcmp(const CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) { | |
| 2181 |
2/2✓ Branch 0 taken 33936011 times.
✓ Branch 1 taken 148150178 times.
|
182086189 | if (wc1 == wc2) return 0; |
| 2182 | |||
| 2183 |
3/4✓ Branch 0 taken 148150178 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 148148069 times.
✓ Branch 3 taken 2109 times.
|
148150178 | if (cs->uca != nullptr && cs->uca->version == UCA_V900) |
| 2184 | 148148069 | return my_uca_charcmp_900(cs, wc1, wc2); | |
| 2185 | |||
| 2186 | size_t length1, length2; | ||
| 2187 | 2109 | uint16 *weight1 = my_char_weight_addr(cs->uca, wc1); /* W3-TODO */ | |
| 2188 | 2109 | uint16 *weight2 = my_char_weight_addr(cs->uca, wc2); | |
| 2189 | |||
| 2190 | /* Check if some of the code points does not have implicit weights */ | ||
| 2191 |
2/4✓ Branch 0 taken 2109 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2109 times.
|
2109 | if (!weight1 || !weight2) return wc1 != wc2; |
| 2192 | |||
| 2193 | /* Quickly compare first weights */ | ||
| 2194 |
2/2✓ Branch 0 taken 2073 times.
✓ Branch 1 taken 36 times.
|
2109 | if (weight1[0] != weight2[0]) return 1; |
| 2195 | |||
| 2196 | /* Thoroughly compare all weights */ | ||
| 2197 | 36 | length1 = cs->uca->lengths[wc1 >> MY_UCA_PSHIFT]; /* W3-TODO */ | |
| 2198 | 36 | length2 = cs->uca->lengths[wc2 >> MY_UCA_PSHIFT]; | |
| 2199 | |||
| 2200 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
|
36 | if (length1 > length2) |
| 2201 | ✗ | return memcmp((const void *)weight1, (const void *)weight2, length2 * 2) | |
| 2202 | ✗ | ? 1 | |
| 2203 | ✗ | : weight1[length2]; | |
| 2204 | |||
| 2205 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
|
36 | if (length1 < length2) |
| 2206 | ✗ | return memcmp((const void *)weight1, (const void *)weight2, length1 * 2) | |
| 2207 | ✗ | ? 1 | |
| 2208 | ✗ | : weight2[length1]; | |
| 2209 | |||
| 2210 | 36 | return memcmp((const void *)weight1, (const void *)weight2, length1 * 2); | |
| 2211 | } | ||
| 2212 | |||
| 2213 | /*** Compare string against string with wildcard | ||
| 2214 | ** 0 if matched | ||
| 2215 | ** -1 if not matched with wildcard | ||
| 2216 | ** 1 if matched with wildcard | ||
| 2217 | */ | ||
| 2218 | |||
| 2219 | 131829251 | static int my_wildcmp_uca_impl(const CHARSET_INFO *cs, const char *str, | |
| 2220 | const char *str_end, const char *wildstr, | ||
| 2221 | const char *wildend, int escape, int w_one, | ||
| 2222 | int w_many, int recurse_level) { | ||
| 2223 |
4/6✓ Branch 0 taken 131828825 times.
✓ Branch 1 taken 426 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 131828825 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 131829251 times.
|
131829251 | if (my_string_stack_guard && my_string_stack_guard(recurse_level)) return 1; |
| 2224 |
2/2✓ Branch 0 taken 131828585 times.
✓ Branch 1 taken 666 times.
|
131829251 | while (wildstr != wildend) { |
| 2225 | 131828585 | int result = -1; /* Not found, using wildcards */ | |
| 2226 | 131828585 | auto mb_wc = cs->cset->mb_wc; | |
| 2227 | |||
| 2228 | /* | ||
| 2229 | Compare the expression and pattern strings character-by-character until | ||
| 2230 | we find a '%' (w_many) in the pattern string. Once we do, we break out | ||
| 2231 | of the loop and try increasingly large widths for the '%' match, | ||
| 2232 | calling ourselves recursively until we find a match. (As an | ||
| 2233 | optimization, we test for the character immediately after '%' before we | ||
| 2234 | recurse.) This takes exponential time in the worst case. | ||
| 2235 | |||
| 2236 | Example: Say we are trying to match the pattern 'ab%cd' against the | ||
| 2237 | string 'ab..c.cd'. We first match the initial 'ab' against each other, | ||
| 2238 | and then see the '%' in the pattern. Since the first character after | ||
| 2239 | '%' is 'c', we skip to the first 'c' in the expression string, and try | ||
| 2240 | to match 'c.cd' against 'cd' by a recursive call. Since this failed, we | ||
| 2241 | scan for the next 'c', and try to match 'cd' against 'cd', which works. | ||
| 2242 | */ | ||
| 2243 | my_wc_t w_wc; | ||
| 2244 | while (true) { | ||
| 2245 | int mb_len; | ||
| 2246 |
1/2✓ Branch 0 taken 168104851 times.
✗ Branch 1 not taken.
|
168104851 | if ((mb_len = mb_wc(cs, &w_wc, (const uchar *)wildstr, |
| 2247 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 168104851 times.
|
168104851 | (const uchar *)wildend)) <= 0) |
| 2248 | 130221051 | return 1; | |
| 2249 | |||
| 2250 | 168104851 | wildstr += mb_len; | |
| 2251 | // If we found '%' (w_many), break out this loop. | ||
| 2252 |
2/2✓ Branch 0 taken 1607534 times.
✓ Branch 1 taken 166497317 times.
|
168104851 | if (w_wc == (my_wc_t)w_many) { |
| 2253 | 1607534 | result = 1; | |
| 2254 | 1607534 | break; | |
| 2255 | } | ||
| 2256 | |||
| 2257 | /* | ||
| 2258 | If the character we just read was an escape character, skip it and | ||
| 2259 | read the next character instead. This character is used verbatim | ||
| 2260 | without checking if it is a wildcard (% or _). However, as a | ||
| 2261 | special exception, a lone escape character at the end of a string is | ||
| 2262 | treated as itself. | ||
| 2263 | */ | ||
| 2264 | 166497317 | bool escaped = false; | |
| 2265 |
4/4✓ Branch 0 taken 7536 times.
✓ Branch 1 taken 166489781 times.
✓ Branch 2 taken 7522 times.
✓ Branch 3 taken 14 times.
|
166497317 | if (w_wc == (my_wc_t)escape && wildstr < wildend) { |
| 2266 |
1/2✓ Branch 0 taken 7522 times.
✗ Branch 1 not taken.
|
7522 | if ((mb_len = mb_wc(cs, &w_wc, (const uchar *)wildstr, |
| 2267 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7522 times.
|
7522 | (const uchar *)wildend)) <= 0) |
| 2268 | ✗ | return 1; | |
| 2269 | 7522 | wildstr += mb_len; | |
| 2270 | 7522 | escaped = true; | |
| 2271 | } | ||
| 2272 | |||
| 2273 | my_wc_t s_wc; | ||
| 2274 |
1/2✓ Branch 0 taken 166497317 times.
✗ Branch 1 not taken.
|
166497317 | if ((mb_len = mb_wc(cs, &s_wc, (const uchar *)str, |
| 2275 |
2/2✓ Branch 0 taken 17541 times.
✓ Branch 1 taken 166479776 times.
|
166497317 | (const uchar *)str_end)) <= 0) |
| 2276 | 17541 | return 1; | |
| 2277 | 166479776 | str += mb_len; | |
| 2278 | |||
| 2279 | // If we found '_' (w_one), skip one character in expression string. | ||
| 2280 |
4/4✓ Branch 0 taken 166472270 times.
✓ Branch 1 taken 7506 times.
✓ Branch 2 taken 2381496 times.
✓ Branch 3 taken 164090774 times.
|
166479776 | if (!escaped && w_wc == (my_wc_t)w_one) { |
| 2281 | 2381496 | result = 1; | |
| 2282 | } else { | ||
| 2283 |
2/2✓ Branch 0 taken 129958894 times.
✓ Branch 1 taken 34139386 times.
|
164098280 | if (my_uca_charcmp(cs, s_wc, w_wc)) return 1; |
| 2284 | } | ||
| 2285 |
2/2✓ Branch 0 taken 244616 times.
✓ Branch 1 taken 36276266 times.
|
36520882 | if (wildstr == wildend) |
| 2286 | 244616 | return (str != str_end); /* Match if both are at end */ | |
| 2287 | 36276266 | } | |
| 2288 | |||
| 2289 |
1/2✓ Branch 0 taken 1607534 times.
✗ Branch 1 not taken.
|
1607534 | if (w_wc == (my_wc_t)w_many) { |
| 2290 | // Remove any '%' and '_' following w_many in the pattern string. | ||
| 2291 | for (;;) { | ||
| 2292 |
2/2✓ Branch 0 taken 534622 times.
✓ Branch 1 taken 1086683 times.
|
1621305 | if (wildstr == wildend) { |
| 2293 | /* | ||
| 2294 | The previous w_many (%) was the last character in the pattern | ||
| 2295 | string, so we have a match no matter what the rest of the | ||
| 2296 | expression string looks like (even empty). | ||
| 2297 | */ | ||
| 2298 | 534622 | return 0; | |
| 2299 | } | ||
| 2300 | int mb_len_wild = | ||
| 2301 |
1/2✓ Branch 0 taken 1086683 times.
✗ Branch 1 not taken.
|
1086683 | mb_wc(cs, &w_wc, (const uchar *)wildstr, (const uchar *)wildend); |
| 2302 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1086683 times.
|
1086683 | if (mb_len_wild <= 0) return 1; |
| 2303 | 1086683 | wildstr += mb_len_wild; | |
| 2304 |
2/2✓ Branch 0 taken 2601 times.
✓ Branch 1 taken 1084082 times.
|
1086683 | if (w_wc == (my_wc_t)w_many) continue; |
| 2305 | |||
| 2306 |
2/2✓ Branch 0 taken 11170 times.
✓ Branch 1 taken 1072912 times.
|
1084082 | if (w_wc == (my_wc_t)w_one) { |
| 2307 | /* | ||
| 2308 | Skip one character in expression string because '_' needs to | ||
| 2309 | match one. | ||
| 2310 | */ | ||
| 2311 | my_wc_t s_wc; | ||
| 2312 | int mb_len = | ||
| 2313 |
1/2✓ Branch 0 taken 11170 times.
✗ Branch 1 not taken.
|
11170 | mb_wc(cs, &s_wc, (const uchar *)str, (const uchar *)str_end); |
| 2314 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 11170 times.
|
11170 | if (mb_len <= 0) return 1; |
| 2315 | 11170 | str += mb_len; | |
| 2316 | 11170 | continue; | |
| 2317 | 11170 | } | |
| 2318 | 1072912 | break; /* Not a wild character */ | |
| 2319 | 13771 | } | |
| 2320 | |||
| 2321 | // No character in the expression string to match w_wc. | ||
| 2322 |
2/2✓ Branch 0 taken 628 times.
✓ Branch 1 taken 1072284 times.
|
1072912 | if (str == str_end) return -1; |
| 2323 | |||
| 2324 | // Skip the escape character ('\') in the pattern if needed. | ||
| 2325 |
4/4✓ Branch 0 taken 126 times.
✓ Branch 1 taken 1072158 times.
✓ Branch 2 taken 113 times.
✓ Branch 3 taken 13 times.
|
1072284 | if (w_wc == (my_wc_t)escape && wildstr < wildend) { |
| 2326 | int mb_len = | ||
| 2327 |
1/2✓ Branch 0 taken 113 times.
✗ Branch 1 not taken.
|
113 | mb_wc(cs, &w_wc, (const uchar *)wildstr, (const uchar *)wildend); |
| 2328 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 113 times.
|
113 | if (mb_len <= 0) return 1; |
| 2329 | 113 | wildstr += mb_len; | |
| 2330 | } | ||
| 2331 | |||
| 2332 | /* | ||
| 2333 | w_wc is now the character following w_many (e.g., if the pattern is | ||
| 2334 | "a%c", w_wc is 'c'). | ||
| 2335 | */ | ||
| 2336 | while (true) { | ||
| 2337 | /* | ||
| 2338 | Skip until we find a character in the expression string that is | ||
| 2339 | equal to w_wc. | ||
| 2340 | */ | ||
| 2341 | 2540992 | int mb_len = 0; | |
| 2342 |
2/2✓ Branch 0 taken 17987909 times.
✓ Branch 1 taken 828975 times.
|
18816884 | while (str != str_end) { |
| 2343 | my_wc_t s_wc; | ||
| 2344 |
1/2✓ Branch 0 taken 17987909 times.
✗ Branch 1 not taken.
|
17987909 | if ((mb_len = mb_wc(cs, &s_wc, (const uchar *)str, |
| 2345 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 17987909 times.
|
17987909 | (const uchar *)str_end)) <= 0) |
| 2346 | ✗ | return 1; | |
| 2347 | |||
| 2348 |
2/2✓ Branch 0 taken 1712017 times.
✓ Branch 1 taken 16275892 times.
|
17987909 | if (!my_uca_charcmp(cs, s_wc, w_wc)) break; |
| 2349 | 16275892 | str += mb_len; | |
| 2350 | } | ||
| 2351 | // No character in the expression string is equal to w_wc. | ||
| 2352 |
2/2✓ Branch 0 taken 828975 times.
✓ Branch 1 taken 1712017 times.
|
2540992 | if (str == str_end) return -1; |
| 2353 | 1712017 | str += mb_len; | |
| 2354 | |||
| 2355 | /* | ||
| 2356 | The strings match up until the first character after w_many in the | ||
| 2357 | pattern string. For the rest part of pattern string and expression | ||
| 2358 | string, we recursively call to get wild compare result. | ||
| 2359 | Example, wildcmp(..., "abcdefg", "a%de%g", ...), we'll run again on | ||
| 2360 | wildcmp(..., "efg", "e%g", ...). | ||
| 2361 | */ | ||
| 2362 |
1/2✓ Branch 0 taken 1712017 times.
✗ Branch 1 not taken.
|
1712017 | result = my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend, escape, |
| 2363 | w_one, w_many, recurse_level + 1); | ||
| 2364 | |||
| 2365 |
2/2✓ Branch 0 taken 243309 times.
✓ Branch 1 taken 1468708 times.
|
1712017 | if (result <= 0) return result; |
| 2366 | 1468708 | } | |
| 2367 | } | ||
| 2368 | } | ||
| 2369 |
2/2✓ Branch 0 taken 588 times.
✓ Branch 1 taken 78 times.
|
666 | return (str != str_end ? 1 : 0); |
| 2370 | } | ||
| 2371 | |||
| 2372 | 15962 | static int my_strcasecmp_uca(const CHARSET_INFO *cs, const char *s, | |
| 2373 | const char *t) { | ||
| 2374 | 15962 | const MY_UNICASE_INFO *uni_plane = cs->caseinfo; | |
| 2375 | const MY_UNICASE_CHARACTER *page; | ||
| 2376 |
4/4✓ Branch 0 taken 34282 times.
✓ Branch 1 taken 3075 times.
✓ Branch 2 taken 33823 times.
✓ Branch 3 taken 459 times.
|
37357 | while (s[0] && t[0]) { |
| 2377 | my_wc_t s_wc, t_wc; | ||
| 2378 | |||
| 2379 |
1/2✓ Branch 0 taken 33823 times.
✗ Branch 1 not taken.
|
33823 | if (static_cast<uchar>(s[0]) < 128) { |
| 2380 | 33823 | s_wc = uni_plane->page[0][static_cast<uchar>(s[0])].tolower; | |
| 2381 | 33823 | s++; | |
| 2382 | } else { | ||
| 2383 | int res; | ||
| 2384 | |||
| 2385 | ✗ | res = cs->cset->mb_wc(cs, &s_wc, pointer_cast<const uchar *>(s), | |
| 2386 | ✗ | pointer_cast<const uchar *>(s + 4)); | |
| 2387 | |||
| 2388 |
0/2✗ Branch 0 not taken.
✗ Branch 1 not taken.
|
12428 | if (res <= 0) return strcmp(s, t); |
| 2389 | ✗ | s += res; | |
| 2390 | ✗ | if (s_wc <= uni_plane->maxchar && (page = uni_plane->page[s_wc >> 8])) | |
| 2391 | ✗ | s_wc = page[s_wc & 0xFF].tolower; | |
| 2392 | } | ||
| 2393 | |||
| 2394 | /* Do the same for the second string */ | ||
| 2395 | |||
| 2396 |
1/2✓ Branch 0 taken 33823 times.
✗ Branch 1 not taken.
|
33823 | if (static_cast<uchar>(t[0]) < 128) { |
| 2397 | /* Convert single byte character into weight */ | ||
| 2398 | 33823 | t_wc = uni_plane->page[0][static_cast<uchar>(t[0])].tolower; | |
| 2399 | 33823 | t++; | |
| 2400 | } else { | ||
| 2401 | ✗ | int res = cs->cset->mb_wc(cs, &t_wc, pointer_cast<const uchar *>(t), | |
| 2402 | ✗ | pointer_cast<const uchar *>(t + 4)); | |
| 2403 | ✗ | if (res <= 0) return strcmp(s, t); | |
| 2404 | ✗ | t += res; | |
| 2405 | |||
| 2406 | ✗ | if (t_wc <= uni_plane->maxchar && (page = uni_plane->page[t_wc >> 8])) | |
| 2407 | ✗ | t_wc = page[t_wc & 0xFF].tolower; | |
| 2408 | } | ||
| 2409 | |||
| 2410 | /* Now we have two weights, let's compare them */ | ||
| 2411 |
2/2✓ Branch 0 taken 12428 times.
✓ Branch 1 taken 21395 times.
|
33823 | if (s_wc != t_wc) return static_cast<int>(s_wc) - static_cast<int>(t_wc); |
| 2412 | } | ||
| 2413 | 3534 | return static_cast<int>(static_cast<uchar>(s[0])) - | |
| 2414 | 3534 | static_cast<int>(static_cast<uchar>(t[0])); | |
| 2415 | } | ||
| 2416 | |||
| 2417 | extern "C" { | ||
| 2418 | 130117234 | static int my_wildcmp_uca(const CHARSET_INFO *cs, const char *str, | |
| 2419 | const char *str_end, const char *wildstr, | ||
| 2420 | const char *wildend, int escape, int w_one, | ||
| 2421 | int w_many) { | ||
| 2422 | 130117234 | return my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend, escape, w_one, | |
| 2423 | 130117234 | w_many, 1); | |
| 2424 | } | ||
| 2425 | } // extern "C" | ||
| 2426 | |||
| 2427 | /* | ||
| 2428 | Collation language is implemented according to | ||
| 2429 | subset of ICU Collation Customization (tailorings): | ||
| 2430 | http://icu.sourceforge.net/userguide/Collate_Customization.html | ||
| 2431 | |||
| 2432 | Collation language elements: | ||
| 2433 | Delimiters: | ||
| 2434 | space - skipped | ||
| 2435 | |||
| 2436 | <char> := A-Z | a-z | \uXXXX | ||
| 2437 | |||
| 2438 | Shift command: | ||
| 2439 | <shift> := & - reset at this letter. | ||
| 2440 | |||
| 2441 | Diff command: | ||
| 2442 | <d1> := < - Identifies a primary difference. | ||
| 2443 | <d2> := << - Identifies a secondary difference. | ||
| 2444 | <d3> := <<< - Idenfifies a tertiary difference. | ||
| 2445 | |||
| 2446 | |||
| 2447 | Collation rules: | ||
| 2448 | <ruleset> := <rule> { <ruleset> } | ||
| 2449 | |||
| 2450 | <rule> := <d1> <string> | ||
| 2451 | | <d2> <string> | ||
| 2452 | | <d3> <string> | ||
| 2453 | | <shift> <char> | ||
| 2454 | |||
| 2455 | <string> := <char> [ <string> ] | ||
| 2456 | |||
| 2457 | An example, Polish collation: | ||
| 2458 | |||
| 2459 | &A < \u0105 <<< \u0104 | ||
| 2460 | &C < \u0107 <<< \u0106 | ||
| 2461 | &E < \u0119 <<< \u0118 | ||
| 2462 | &L < \u0142 <<< \u0141 | ||
| 2463 | &N < \u0144 <<< \u0143 | ||
| 2464 | &O < \u00F3 <<< \u00D3 | ||
| 2465 | &S < \u015B <<< \u015A | ||
| 2466 | &Z < \u017A <<< \u017B | ||
| 2467 | */ | ||
| 2468 | |||
| 2469 | typedef enum my_coll_lexem_num_en { | ||
| 2470 | MY_COLL_LEXEM_EOF = 0, | ||
| 2471 | MY_COLL_LEXEM_SHIFT = 1, | ||
| 2472 | MY_COLL_LEXEM_RESET = 4, | ||
| 2473 | MY_COLL_LEXEM_CHAR = 5, | ||
| 2474 | MY_COLL_LEXEM_ERROR = 6, | ||
| 2475 | MY_COLL_LEXEM_OPTION = 7, | ||
| 2476 | MY_COLL_LEXEM_EXTEND = 8, | ||
| 2477 | MY_COLL_LEXEM_CONTEXT = 9 | ||
| 2478 | } my_coll_lexem_num; | ||
| 2479 | |||
| 2480 | /** | ||
| 2481 | Convert collation customization lexem to string, | ||
| 2482 | for nice error reporting | ||
| 2483 | |||
| 2484 | @param term lexem code | ||
| 2485 | |||
| 2486 | @return lexem name | ||
| 2487 | */ | ||
| 2488 | |||
| 2489 | 1 | static const char *my_coll_lexem_num_to_str(my_coll_lexem_num term) { | |
| 2490 |
1/9✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
|
1 | switch (term) { |
| 2491 | ✗ | case MY_COLL_LEXEM_EOF: | |
| 2492 | ✗ | return "EOF"; | |
| 2493 | 1 | case MY_COLL_LEXEM_SHIFT: | |
| 2494 | 1 | return "Shift"; | |
| 2495 | ✗ | case MY_COLL_LEXEM_RESET: | |
| 2496 | ✗ | return "&"; | |
| 2497 | ✗ | case MY_COLL_LEXEM_CHAR: | |
| 2498 | ✗ | return "Character"; | |
| 2499 | ✗ | case MY_COLL_LEXEM_OPTION: | |
| 2500 | ✗ | return "Bracket option"; | |
| 2501 | ✗ | case MY_COLL_LEXEM_EXTEND: | |
| 2502 | ✗ | return "/"; | |
| 2503 | ✗ | case MY_COLL_LEXEM_CONTEXT: | |
| 2504 | ✗ | return "|"; | |
| 2505 | ✗ | case MY_COLL_LEXEM_ERROR: | |
| 2506 | ✗ | return "ERROR"; | |
| 2507 | } | ||
| 2508 | ✗ | return nullptr; | |
| 2509 | } | ||
| 2510 | |||
| 2511 | struct MY_COLL_LEXEM { | ||
| 2512 | my_coll_lexem_num term; | ||
| 2513 | const char *beg; | ||
| 2514 | const char *end; | ||
| 2515 | const char *prev; | ||
| 2516 | int diff; | ||
| 2517 | int code; | ||
| 2518 | }; | ||
| 2519 | |||
| 2520 | /* | ||
| 2521 | Initialize collation rule lexical anilizer | ||
| 2522 | |||
| 2523 | SYNOPSIS | ||
| 2524 | my_coll_lexem_init | ||
| 2525 | lexem Lex analyzer to init | ||
| 2526 | str Const string to parse | ||
| 2527 | str_end End of the string | ||
| 2528 | USAGE | ||
| 2529 | |||
| 2530 | RETURN VALUES | ||
| 2531 | N/A | ||
| 2532 | */ | ||
| 2533 | |||
| 2534 | 29009 | static void my_coll_lexem_init(MY_COLL_LEXEM *lexem, const char *str, | |
| 2535 | const char *str_end) { | ||
| 2536 | 29009 | lexem->beg = str; | |
| 2537 | 29009 | lexem->prev = str; | |
| 2538 | 29009 | lexem->end = str_end; | |
| 2539 | 29009 | lexem->diff = 0; | |
| 2540 | 29009 | lexem->code = 0; | |
| 2541 | 29009 | } | |
| 2542 | |||
| 2543 | /** | ||
| 2544 | Compare lexem to string with length | ||
| 2545 | |||
| 2546 | @param lexem lexem | ||
| 2547 | @param pattern string | ||
| 2548 | @param patternlen string length | ||
| 2549 | |||
| 2550 | @retval 0 if lexem is equal to string, non-0 otherwise. | ||
| 2551 | */ | ||
| 2552 | |||
| 2553 | 125963 | static int lex_cmp(MY_COLL_LEXEM *lexem, const char *pattern, | |
| 2554 | size_t patternlen) { | ||
| 2555 | 125963 | size_t lexemlen = lexem->beg - lexem->prev; | |
| 2556 |
2/2✓ Branch 0 taken 62805 times.
✓ Branch 1 taken 63158 times.
|
125963 | if (lexemlen < patternlen) return 1; /* Not a prefix */ |
| 2557 | 63158 | return native_strncasecmp(lexem->prev, pattern, patternlen); | |
| 2558 | } | ||
| 2559 | |||
| 2560 | /* | ||
| 2561 | Print collation customization expression parse error, with context. | ||
| 2562 | |||
| 2563 | SYNOPSIS | ||
| 2564 | my_coll_lexem_print_error | ||
| 2565 | lexem Lex analyzer to take context from | ||
| 2566 | errstr string to write error to | ||
| 2567 | errsize errstr size | ||
| 2568 | txt error message | ||
| 2569 | col_name collation name | ||
| 2570 | USAGE | ||
| 2571 | |||
| 2572 | RETURN VALUES | ||
| 2573 | N/A | ||
| 2574 | */ | ||
| 2575 | |||
| 2576 | 3 | static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem, char *errstr, | |
| 2577 | size_t errsize, const char *txt, | ||
| 2578 | const char *col_name) { | ||
| 2579 | char tail[30]; | ||
| 2580 | 3 | size_t len = lexem->end - lexem->prev; | |
| 2581 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | strmake(tail, lexem->prev, std::min(len, sizeof(tail) - 1)); |
| 2582 | 3 | errstr[errsize - 1] = '\0'; | |
| 2583 | 3 | snprintf(errstr, errsize - 1, "%s at '%s' for COLLATION : %s", | |
| 2584 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
|
3 | txt[0] ? txt : "Syntax error", tail, col_name); |
| 2585 | 3 | } | |
| 2586 | |||
| 2587 | /* | ||
| 2588 | Convert a hex digit into its numeric value | ||
| 2589 | |||
| 2590 | SYNOPSIS | ||
| 2591 | ch2x | ||
| 2592 | ch hex digit to convert | ||
| 2593 | USAGE | ||
| 2594 | |||
| 2595 | RETURN VALUES | ||
| 2596 | an integer value in the range 0..15 | ||
| 2597 | -1 on error | ||
| 2598 | */ | ||
| 2599 | |||
| 2600 | 6647976 | static int ch2x(int ch) { | |
| 2601 |
4/4✓ Branch 0 taken 5881834 times.
✓ Branch 1 taken 766142 times.
✓ Branch 2 taken 3574586 times.
✓ Branch 3 taken 2307248 times.
|
6647976 | if (ch >= '0' && ch <= '9') return ch - '0'; |
| 2602 | |||
| 2603 |
4/4✓ Branch 0 taken 208941 times.
✓ Branch 1 taken 2864449 times.
✓ Branch 2 taken 4482 times.
✓ Branch 3 taken 204459 times.
|
3073390 | if (ch >= 'a' && ch <= 'f') return 10 + ch - 'a'; |
| 2604 | |||
| 2605 |
4/4✓ Branch 0 taken 1967559 times.
✓ Branch 1 taken 1101349 times.
✓ Branch 2 taken 1752102 times.
✓ Branch 3 taken 215457 times.
|
3068908 | if (ch >= 'A' && ch <= 'F') return 10 + ch - 'A'; |
| 2606 | |||
| 2607 | 1316806 | return -1; | |
| 2608 | } | ||
| 2609 | |||
| 2610 | /* | ||
| 2611 | Collation language lexical parser: | ||
| 2612 | Scans the next lexem. | ||
| 2613 | |||
| 2614 | SYNOPSIS | ||
| 2615 | my_coll_lexem_next | ||
| 2616 | lexem Lex analyzer, previously initialized by | ||
| 2617 | my_coll_lexem_init. | ||
| 2618 | USAGE | ||
| 2619 | Call this function in a loop | ||
| 2620 | |||
| 2621 | RETURN VALUES | ||
| 2622 | Lexem number: eof, diff, shift, char or error. | ||
| 2623 | */ | ||
| 2624 | |||
| 2625 | 3480019 | static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem) { | |
| 2626 | const char *beg; | ||
| 2627 | my_coll_lexem_num rc; | ||
| 2628 | |||
| 2629 |
2/2✓ Branch 0 taken 5070598 times.
✓ Branch 1 taken 87022 times.
|
5157620 | for (beg = lexem->beg; beg < lexem->end; beg++) { |
| 2630 |
8/8✓ Branch 0 taken 1677601 times.
✓ Branch 1 taken 25980 times.
✓ Branch 2 taken 293472 times.
✓ Branch 3 taken 267214 times.
✓ Branch 4 taken 56538 times.
✓ Branch 5 taken 204479 times.
✓ Branch 6 taken 795021 times.
✓ Branch 7 taken 1750293 times.
|
5070598 | switch (*beg) { |
| 2631 | 1677601 | case ' ': | |
| 2632 | case '\t': | ||
| 2633 | case '\r': | ||
| 2634 | case '\n': | ||
| 2635 | 1677601 | continue; | |
| 2636 | |||
| 2637 | 25980 | case '[': /* Bracket expression, e.g. "[optimize [a-z]]" */ | |
| 2638 | { | ||
| 2639 | size_t nbrackets; /* Indicates nested recursion level */ | ||
| 2640 |
1/2✓ Branch 0 taken 235013 times.
✗ Branch 1 not taken.
|
235013 | for (beg++, nbrackets = 1; beg < lexem->end; beg++) { |
| 2641 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 235013 times.
|
235013 | if (*beg == '[') /* Enter nested bracket expression */ |
| 2642 | ✗ | nbrackets++; | |
| 2643 |
2/2✓ Branch 0 taken 25980 times.
✓ Branch 1 taken 209033 times.
|
235013 | else if (*beg == ']') { |
| 2644 |
1/2✓ Branch 0 taken 25980 times.
✗ Branch 1 not taken.
|
25980 | if (--nbrackets == 0) { |
| 2645 | 25980 | rc = MY_COLL_LEXEM_OPTION; | |
| 2646 | 25980 | beg++; | |
| 2647 | 25980 | goto ex; | |
| 2648 | } | ||
| 2649 | } | ||
| 2650 | } | ||
| 2651 | ✗ | rc = MY_COLL_LEXEM_ERROR; | |
| 2652 | ✗ | goto ex; | |
| 2653 | } | ||
| 2654 | |||
| 2655 | 293472 | case '&': | |
| 2656 | 293472 | beg++; | |
| 2657 | 293472 | rc = MY_COLL_LEXEM_RESET; | |
| 2658 | 293472 | goto ex; | |
| 2659 | |||
| 2660 | 267214 | case '=': | |
| 2661 | 267214 | beg++; | |
| 2662 | 267214 | lexem->diff = 0; | |
| 2663 | 267214 | rc = MY_COLL_LEXEM_SHIFT; | |
| 2664 | 267214 | goto ex; | |
| 2665 | |||
| 2666 | 56538 | case '/': | |
| 2667 | 56538 | beg++; | |
| 2668 | 56538 | rc = MY_COLL_LEXEM_EXTEND; | |
| 2669 | 56538 | goto ex; | |
| 2670 | |||
| 2671 | 204479 | case '|': | |
| 2672 | 204479 | beg++; | |
| 2673 | 204479 | rc = MY_COLL_LEXEM_CONTEXT; | |
| 2674 | 204479 | goto ex; | |
| 2675 | |||
| 2676 | 795021 | case '<': /* Shift: '<' or '<<' or '<<<' or '<<<<' */ | |
| 2677 | { | ||
| 2678 | /* Scan up to 3 additional '<' characters */ | ||
| 2679 | 795021 | for (beg++, lexem->diff = 1; | |
| 2680 |
4/6✓ Branch 0 taken 1683590 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 888569 times.
✓ Branch 3 taken 795021 times.
✓ Branch 4 taken 888569 times.
✗ Branch 5 not taken.
|
1683590 | (beg < lexem->end) && (*beg == '<') && (lexem->diff <= 3); |
| 2681 | 888569 | beg++, lexem->diff++) | |
| 2682 | ; | ||
| 2683 | 795021 | rc = MY_COLL_LEXEM_SHIFT; | |
| 2684 | 795021 | goto ex; | |
| 2685 | } | ||
| 2686 | 1750293 | default: | |
| 2687 | 1750293 | break; | |
| 2688 | 1677601 | } | |
| 2689 | |||
| 2690 | /* Escaped character, e.g. \u1234 */ | ||
| 2691 |
4/6✓ Branch 0 taken 1329975 times.
✓ Branch 1 taken 420318 times.
✓ Branch 2 taken 1329975 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1329975 times.
✗ Branch 5 not taken.
|
1750293 | if ((*beg == '\\') && (beg + 2 < lexem->end) && (beg[1] == 'u') && |
| 2692 |
2/2✓ Branch 0 taken 1329973 times.
✓ Branch 1 taken 2 times.
|
1329975 | my_isxdigit(&my_charset_utf8_general_ci, beg[2])) { |
| 2693 | int ch; | ||
| 2694 | |||
| 2695 | 1329973 | beg += 2; | |
| 2696 | 1329973 | lexem->code = 0; | |
| 2697 |
6/6✓ Branch 0 taken 6647976 times.
✓ Branch 1 taken 13167 times.
✓ Branch 2 taken 5331170 times.
✓ Branch 3 taken 1316806 times.
✓ Branch 4 taken 5331170 times.
✓ Branch 5 taken 1329973 times.
|
6661143 | while ((beg < lexem->end) && ((ch = ch2x(beg[0])) >= 0)) { |
| 2698 | 5331170 | lexem->code = (lexem->code << 4) + ch; | |
| 2699 | 5331170 | beg++; | |
| 2700 | } | ||
| 2701 | 1329973 | rc = MY_COLL_LEXEM_CHAR; | |
| 2702 | 1329973 | goto ex; | |
| 2703 | } | ||
| 2704 | |||
| 2705 | /* | ||
| 2706 | Unescaped single byte character: | ||
| 2707 | allow printable ASCII range except SPACE and | ||
| 2708 | special characters parsed above []<&/|= | ||
| 2709 | */ | ||
| 2710 |
3/4✓ Branch 0 taken 336208 times.
✓ Branch 1 taken 84112 times.
✓ Branch 2 taken 336208 times.
✗ Branch 3 not taken.
|
420320 | if (*beg >= 0x21 && *beg <= 0x7E) { |
| 2711 | 336208 | lexem->code = *beg++; | |
| 2712 | 336208 | rc = MY_COLL_LEXEM_CHAR; | |
| 2713 | 336208 | goto ex; | |
| 2714 | } | ||
| 2715 | |||
| 2716 |
1/2✓ Branch 0 taken 84112 times.
✗ Branch 1 not taken.
|
84112 | if (((uchar)*beg) > 0x7F) /* Unescaped multibyte character */ |
| 2717 | { | ||
| 2718 | 84112 | CHARSET_INFO *cs = &my_charset_utf8_general_ci; | |
| 2719 | my_wc_t wc; | ||
| 2720 |
1/2✓ Branch 0 taken 84112 times.
✗ Branch 1 not taken.
|
84112 | int nbytes = cs->cset->mb_wc(cs, &wc, pointer_cast<const uchar *>(beg), |
| 2721 | 84112 | pointer_cast<const uchar *>(lexem->end)); | |
| 2722 |
1/2✓ Branch 0 taken 84112 times.
✗ Branch 1 not taken.
|
84112 | if (nbytes > 0) { |
| 2723 | 84112 | rc = MY_COLL_LEXEM_CHAR; | |
| 2724 | 84112 | beg += nbytes; | |
| 2725 | 84112 | lexem->code = (int)wc; | |
| 2726 | 84112 | goto ex; | |
| 2727 | } | ||
| 2728 | } | ||
| 2729 | |||
| 2730 | ✗ | rc = MY_COLL_LEXEM_ERROR; | |
| 2731 | ✗ | goto ex; | |
| 2732 | } | ||
| 2733 | 87022 | rc = MY_COLL_LEXEM_EOF; | |
| 2734 | |||
| 2735 | 3480019 | ex: | |
| 2736 | 3480019 | lexem->prev = lexem->beg; | |
| 2737 | 3480019 | lexem->beg = beg; | |
| 2738 | 3480019 | lexem->term = rc; | |
| 2739 | 3480019 | return rc; | |
| 2740 | } | ||
| 2741 | |||
| 2742 | /* | ||
| 2743 | Collation rule item | ||
| 2744 | */ | ||
| 2745 | |||
| 2746 | #define MY_UCA_MAX_EXPANSION 6 /* Maximum expansion length */ | ||
| 2747 | |||
| 2748 | struct MY_COLL_RULE { | ||
| 2749 | my_wc_t base[MY_UCA_MAX_EXPANSION]; /* Base character */ | ||
| 2750 | my_wc_t curr[MY_UCA_MAX_CONTRACTION]; /* Current character */ | ||
| 2751 | int diff[4]; /* Primary, Secondary, Tertiary, Quaternary difference */ | ||
| 2752 | size_t before_level; /* "reset before" indicator */ | ||
| 2753 | bool with_context; | ||
| 2754 | }; | ||
| 2755 | |||
| 2756 | /** | ||
| 2757 | Return length of the "reset" string of a rule. | ||
| 2758 | |||
| 2759 | @param r Collation customization rule | ||
| 2760 | |||
| 2761 | @return Length of r->base | ||
| 2762 | */ | ||
| 2763 | |||
| 2764 | 1250794 | static inline size_t my_coll_rule_reset_length(MY_COLL_RULE *r) { | |
| 2765 | 1250794 | return my_wstrnlen(r->base, MY_UCA_MAX_EXPANSION); | |
| 2766 | } | ||
| 2767 | |||
| 2768 | /** | ||
| 2769 | Return length of the "shift" string of a rule. | ||
| 2770 | |||
| 2771 | @param r Collation customization rule | ||
| 2772 | |||
| 2773 | @return Length of r->base | ||
| 2774 | */ | ||
| 2775 | |||
| 2776 | 1250794 | static inline size_t my_coll_rule_shift_length(MY_COLL_RULE *r) { | |
| 2777 | 1250794 | return my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION); | |
| 2778 | } | ||
| 2779 | |||
| 2780 | /** | ||
| 2781 | Append new character to the end of a 0-terminated wide string. | ||
| 2782 | |||
| 2783 | @param wc Wide string | ||
| 2784 | @param limit Maximum possible result length | ||
| 2785 | @param code Character to add | ||
| 2786 | |||
| 2787 | @return 1 if character was added, 0 if string was too long | ||
| 2788 | */ | ||
| 2789 | |||
| 2790 | 1750409 | static int my_coll_rule_expand(my_wc_t *wc, size_t limit, my_wc_t code) { | |
| 2791 | size_t i; | ||
| 2792 |
1/2✓ Branch 0 taken 1957313 times.
✗ Branch 1 not taken.
|
1957313 | for (i = 0; i < limit; i++) { |
| 2793 |
2/2✓ Branch 0 taken 1750409 times.
✓ Branch 1 taken 206904 times.
|
1957313 | if (wc[i] == 0) { |
| 2794 | 1750409 | wc[i] = code; | |
| 2795 | 1750409 | return 1; | |
| 2796 | } | ||
| 2797 | } | ||
| 2798 | ✗ | return 0; | |
| 2799 | } | ||
| 2800 | |||
| 2801 | /** | ||
| 2802 | Initialize collation customization rule | ||
| 2803 | |||
| 2804 | @param r Rule | ||
| 2805 | */ | ||
| 2806 | |||
| 2807 | 293472 | static void my_coll_rule_reset(MY_COLL_RULE *r) { memset(r, 0, sizeof(*r)); } | |
| 2808 | |||
| 2809 | /* | ||
| 2810 | Shift methods: | ||
| 2811 | Simple: "&B < C" : weight('C') = weight('B') + 1 | ||
| 2812 | Expand: weight('C') = { weight('B'), weight(last_non_ignorable) + 1 } | ||
| 2813 | */ | ||
| 2814 | typedef enum { | ||
| 2815 | my_shift_method_simple = 0, | ||
| 2816 | my_shift_method_expand | ||
| 2817 | } my_coll_shift_method; | ||
| 2818 | |||
| 2819 | struct MY_COLL_RULES { | ||
| 2820 | MY_UCA_INFO *uca; /* Unicode weight data */ | ||
| 2821 | size_t nrules; /* Number of rules in the rule array */ | ||
| 2822 | size_t mrules; /* Number of allocated rules */ | ||
| 2823 | MY_COLL_RULE *rule; /* Rule array */ | ||
| 2824 | MY_CHARSET_LOADER *loader; | ||
| 2825 | my_coll_shift_method shift_after_method; | ||
| 2826 | }; | ||
| 2827 | |||
| 2828 | /** | ||
| 2829 | Realloc rule array to a new size. | ||
| 2830 | Reallocate memory for 128 additional rules at once, | ||
| 2831 | to reduce the number of reallocs, which is important | ||
| 2832 | for long tailorings (e.g. for East Asian collations). | ||
| 2833 | |||
| 2834 | @param rules Rule container | ||
| 2835 | @param n new number of rules | ||
| 2836 | |||
| 2837 | @return 0 on success, -1 on error. | ||
| 2838 | */ | ||
| 2839 | |||
| 2840 | 1250794 | static int my_coll_rules_realloc(MY_COLL_RULES *rules, size_t n) { | |
| 2841 |
3/4✓ Branch 0 taken 30883 times.
✓ Branch 1 taken 1219911 times.
✓ Branch 2 taken 1250794 times.
✗ Branch 3 not taken.
|
1281677 | if (rules->nrules < rules->mrules || |
| 2842 |
1/2✓ Branch 0 taken 30883 times.
✗ Branch 1 not taken.
|
30883 | (rules->rule = static_cast<MY_COLL_RULE *>(rules->loader->mem_realloc( |
| 2843 | 30883 | rules->rule, sizeof(MY_COLL_RULE) * (rules->mrules = n + 128))))) | |
| 2844 | 1250794 | return 0; | |
| 2845 | ✗ | return -1; | |
| 2846 | } | ||
| 2847 | |||
| 2848 | /** | ||
| 2849 | Append one new rule to a rule array | ||
| 2850 | |||
| 2851 | @param rules Rule container | ||
| 2852 | @param rule New rule to add | ||
| 2853 | |||
| 2854 | @return 0 on success, -1 on error. | ||
| 2855 | */ | ||
| 2856 | |||
| 2857 | 1250794 | static int my_coll_rules_add(MY_COLL_RULES *rules, MY_COLL_RULE *rule) { | |
| 2858 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1250794 times.
|
1250794 | if (my_coll_rules_realloc(rules, rules->nrules + 1)) return -1; |
| 2859 | 1250794 | rules->rule[rules->nrules++] = rule[0]; | |
| 2860 | 1250794 | return 0; | |
| 2861 | } | ||
| 2862 | |||
| 2863 | /** | ||
| 2864 | Apply difference at level | ||
| 2865 | |||
| 2866 | @param r Rule | ||
| 2867 | @param level Level (0,1,2,3,4) | ||
| 2868 | */ | ||
| 2869 | |||
| 2870 | 1062235 | static void my_coll_rule_shift_at_level(MY_COLL_RULE *r, int level) { | |
| 2871 |
5/6✓ Branch 0 taken 37548 times.
✓ Branch 1 taken 341219 times.
✓ Branch 2 taken 93487 times.
✓ Branch 3 taken 322767 times.
✓ Branch 4 taken 267214 times.
✗ Branch 5 not taken.
|
1062235 | switch (level) { |
| 2872 | 37548 | case 4: /* Quaternary difference */ | |
| 2873 | 37548 | r->diff[3]++; | |
| 2874 | 37548 | break; | |
| 2875 | 341219 | case 3: /* Tertiary difference */ | |
| 2876 | 341219 | r->diff[2]++; | |
| 2877 | 341219 | r->diff[3] = 0; | |
| 2878 | 341219 | break; | |
| 2879 | 93487 | case 2: /* Secondary difference */ | |
| 2880 | 93487 | r->diff[1]++; | |
| 2881 | 93487 | r->diff[2] = r->diff[3] = 0; | |
| 2882 | 93487 | break; | |
| 2883 | 322767 | case 1: /* Primary difference */ | |
| 2884 | 322767 | r->diff[0]++; | |
| 2885 | 322767 | r->diff[1] = r->diff[2] = r->diff[3] = 0; | |
| 2886 | 322767 | break; | |
| 2887 | 267214 | case 0: | |
| 2888 | /* Do nothing for '=': use the previous offsets for all levels */ | ||
| 2889 | 267214 | break; | |
| 2890 | ✗ | default: | |
| 2891 | ✗ | assert(0); | |
| 2892 | } | ||
| 2893 | 1062235 | } | |
| 2894 | |||
| 2895 | struct MY_COLL_RULE_PARSER { | ||
| 2896 | MY_COLL_LEXEM tok[2]; /* Current token and next token for look-ahead */ | ||
| 2897 | MY_COLL_RULE rule; /* Currently parsed rule */ | ||
| 2898 | MY_COLL_RULES *rules; /* Rule list pointer */ | ||
| 2899 | char errstr[128]; /* Error message */ | ||
| 2900 | }; | ||
| 2901 | |||
| 2902 | /** | ||
| 2903 | Current parser token | ||
| 2904 | |||
| 2905 | @param p Collation customization parser | ||
| 2906 | |||
| 2907 | @return Pointer to the current token | ||
| 2908 | */ | ||
| 2909 | |||
| 2910 | 16265125 | static MY_COLL_LEXEM *my_coll_parser_curr(MY_COLL_RULE_PARSER *p) { | |
| 2911 | 16265125 | return &p->tok[0]; | |
| 2912 | } | ||
| 2913 | |||
| 2914 | /** | ||
| 2915 | Next parser token, to look ahead. | ||
| 2916 | |||
| 2917 | @param p Collation customization parser | ||
| 2918 | |||
| 2919 | @return Pointer to the next token | ||
| 2920 | */ | ||
| 2921 | |||
| 2922 | 6902020 | static MY_COLL_LEXEM *my_coll_parser_next(MY_COLL_RULE_PARSER *p) { | |
| 2923 | 6902020 | return &p->tok[1]; | |
| 2924 | } | ||
| 2925 | |||
| 2926 | /** | ||
| 2927 | Scan one token from the input stream | ||
| 2928 | |||
| 2929 | @param p Collation customization parser | ||
| 2930 | |||
| 2931 | @return 1, for convenience, to use in logical expressions easier. | ||
| 2932 | */ | ||
| 2933 | 3422001 | static int my_coll_parser_scan(MY_COLL_RULE_PARSER *p) { | |
| 2934 | 3422001 | my_coll_parser_curr(p)[0] = my_coll_parser_next(p)[0]; | |
| 2935 | 3422001 | my_coll_lexem_next(my_coll_parser_next(p)); | |
| 2936 | 3422001 | return 1; | |
| 2937 | } | ||
| 2938 | |||
| 2939 | /** | ||
| 2940 | Initialize collation customization parser | ||
| 2941 | |||
| 2942 | @param p Collation customization parser | ||
| 2943 | @param rules Where to store rules | ||
| 2944 | @param str Beginning of a collation customization string | ||
| 2945 | @param str_end End of the collation customizations string | ||
| 2946 | */ | ||
| 2947 | |||
| 2948 | 29009 | static void my_coll_parser_init(MY_COLL_RULE_PARSER *p, MY_COLL_RULES *rules, | |
| 2949 | const char *str, const char *str_end) { | ||
| 2950 | /* | ||
| 2951 | Initialize parser to the input buffer and scan two tokens, | ||
| 2952 | to make the current token and the next token known. | ||
| 2953 | */ | ||
| 2954 | 29009 | memset(p, 0, sizeof(*p)); | |
| 2955 | 29009 | p->rules = rules; | |
| 2956 | 29009 | p->errstr[0] = '\0'; | |
| 2957 | 29009 | my_coll_lexem_init(my_coll_parser_curr(p), str, str_end); | |
| 2958 | 29009 | my_coll_lexem_next(my_coll_parser_curr(p)); | |
| 2959 | 29009 | my_coll_parser_next(p)[0] = my_coll_parser_curr(p)[0]; | |
| 2960 | 29009 | my_coll_lexem_next(my_coll_parser_next(p)); | |
| 2961 | 29009 | } | |
| 2962 | |||
| 2963 | /** | ||
| 2964 | Display error when an unexpected token found | ||
| 2965 | |||
| 2966 | @param p Collation customization parser | ||
| 2967 | @param term Which lexem was expected | ||
| 2968 | |||
| 2969 | @return 0, to use in "return" and boolean expressions. | ||
| 2970 | */ | ||
| 2971 | |||
| 2972 | 1 | static int my_coll_parser_expected_error(MY_COLL_RULE_PARSER *p, | |
| 2973 | my_coll_lexem_num term) { | ||
| 2974 | 1 | snprintf(p->errstr, sizeof(p->errstr), "%s expected", | |
| 2975 | my_coll_lexem_num_to_str(term)); | ||
| 2976 | 1 | return 0; | |
| 2977 | } | ||
| 2978 | |||
| 2979 | /** | ||
| 2980 | Display error when a too long character sequence is met | ||
| 2981 | |||
| 2982 | @param p Collation customization parser | ||
| 2983 | @param name Which kind of sequence: contraction, expansion, etc. | ||
| 2984 | |||
| 2985 | @return 0, to use in "return" and boolean expressions. | ||
| 2986 | */ | ||
| 2987 | |||
| 2988 | ✗ | static int my_coll_parser_too_long_error(MY_COLL_RULE_PARSER *p, | |
| 2989 | const char *name) { | ||
| 2990 | ✗ | snprintf(p->errstr, sizeof(p->errstr), "%s is too long", name); | |
| 2991 | ✗ | return 0; | |
| 2992 | } | ||
| 2993 | |||
| 2994 | /** | ||
| 2995 | Scan the given lexem from input stream, or display "expected" error. | ||
| 2996 | |||
| 2997 | @param p Collation customization parser | ||
| 2998 | @param term Which lexem is expected. | ||
| 2999 | |||
| 3000 | @retval 0 if the required term was not found. | ||
| 3001 | @retval 1 if the required term was found. | ||
| 3002 | */ | ||
| 3003 | 1939132 | static int my_coll_parser_scan_term(MY_COLL_RULE_PARSER *p, | |
| 3004 | my_coll_lexem_num term) { | ||
| 3005 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1939132 times.
|
1939132 | if (my_coll_parser_curr(p)->term != term) |
| 3006 | ✗ | return my_coll_parser_expected_error(p, term); | |
| 3007 | 1939132 | return my_coll_parser_scan(p); | |
| 3008 | } | ||
| 3009 | |||
| 3010 | /* | ||
| 3011 | In the following code we have a few functions to parse | ||
| 3012 | various collation customization non-terminal symbols. | ||
| 3013 | Unlike our usual coding convention, they return | ||
| 3014 | - 0 on "error" (when the rule was not scanned) and | ||
| 3015 | - 1 on "success"(when the rule was scanned). | ||
| 3016 | This is done intentionally to make body of the functions look easier | ||
| 3017 | and repeat the grammar of the rules in straightforward manner. | ||
| 3018 | For example: | ||
| 3019 | |||
| 3020 | // <x> ::= <y> | <z> | ||
| 3021 | int parse_x() { return parse_y() || parser_z(); } | ||
| 3022 | |||
| 3023 | // <x> ::= <y> <z> | ||
| 3024 | int parse_x() { return parse_y() && parser_z(); } | ||
| 3025 | |||
| 3026 | Using 1 on "not found" and 0 on "found" in the parser code would | ||
| 3027 | make the code more error prone and harder to read because | ||
| 3028 | of having to use inverse boolean logic. | ||
| 3029 | */ | ||
| 3030 | |||
| 3031 | /** | ||
| 3032 | Scan a collation setting in brakets, for example UCA version. | ||
| 3033 | |||
| 3034 | @param p Collation customization parser | ||
| 3035 | |||
| 3036 | @retval 0 if setting was scanned. | ||
| 3037 | @retval 1 if setting was not scanned. | ||
| 3038 | */ | ||
| 3039 | |||
| 3040 | 11 | static int my_coll_parser_scan_setting(MY_COLL_RULE_PARSER *p) { | |
| 3041 | 11 | MY_COLL_RULES *rules = p->rules; | |
| 3042 | 11 | MY_COLL_LEXEM *lexem = my_coll_parser_curr(p); | |
| 3043 | |||
| 3044 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 6 times.
|
11 | if (!lex_cmp(lexem, STRING_WITH_LEN("[version 4.0.0]"))) { |
| 3045 | 5 | rules->uca = &my_uca_v400; | |
| 3046 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | } else if (!lex_cmp(lexem, STRING_WITH_LEN("[version 5.2.0]"))) { |
| 3047 | 2 | rules->uca = &my_uca_v520; | |
| 3048 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | } else if (!lex_cmp(lexem, STRING_WITH_LEN("[shift-after-method expand]"))) { |
| 3049 | 2 | rules->shift_after_method = my_shift_method_expand; | |
| 3050 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | } else if (!lex_cmp(lexem, STRING_WITH_LEN("[shift-after-method simple]"))) { |
| 3051 | ✗ | rules->shift_after_method = my_shift_method_simple; | |
| 3052 | } else { | ||
| 3053 | 2 | return 0; | |
| 3054 | } | ||
| 3055 | 9 | return my_coll_parser_scan(p); | |
| 3056 | } | ||
| 3057 | |||
| 3058 | /** | ||
| 3059 | Scan multiple collation settings | ||
| 3060 | |||
| 3061 | @param p Collation customization parser | ||
| 3062 | |||
| 3063 | @retval 0 if no settings were scanned. | ||
| 3064 | @retval 1 if one or more settings were scanned. | ||
| 3065 | */ | ||
| 3066 | |||
| 3067 | 29009 | static int my_coll_parser_scan_settings(MY_COLL_RULE_PARSER *p) { | |
| 3068 | /* Scan collation setting or special purpose command */ | ||
| 3069 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 29007 times.
|
29018 | while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION) { |
| 3070 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 9 times.
|
11 | if (!my_coll_parser_scan_setting(p)) return 0; |
| 3071 | } | ||
| 3072 | 29007 | return 1; | |
| 3073 | } | ||
| 3074 | |||
| 3075 | /** | ||
| 3076 | Scan [before xxx] reset option | ||
| 3077 | |||
| 3078 | @param p Collation customization parser | ||
| 3079 | |||
| 3080 | @retval 0 if reset option was not scanned. | ||
| 3081 | @retval 1 if reset option was scanned. | ||
| 3082 | */ | ||
| 3083 | |||
| 3084 | 25951 | static int my_coll_parser_scan_reset_before(MY_COLL_RULE_PARSER *p) { | |
| 3085 | 25951 | MY_COLL_LEXEM *lexem = my_coll_parser_curr(p); | |
| 3086 |
6/6✓ Branch 0 taken 25907 times.
✓ Branch 1 taken 44 times.
✓ Branch 2 taken 6958 times.
✓ Branch 3 taken 18949 times.
✓ Branch 4 taken 7002 times.
✓ Branch 5 taken 18949 times.
|
51858 | if (!lex_cmp(lexem, STRING_WITH_LEN("[before primary]")) || |
| 3087 | 25907 | !lex_cmp(lexem, STRING_WITH_LEN("[before 1]"))) { | |
| 3088 | 7002 | p->rule.before_level = 1; | |
| 3089 |
6/6✓ Branch 0 taken 18945 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1120 times.
✓ Branch 3 taken 17825 times.
✓ Branch 4 taken 1124 times.
✓ Branch 5 taken 17825 times.
|
37894 | } else if (!lex_cmp(lexem, STRING_WITH_LEN("[before secondary]")) || |
| 3090 | 18945 | !lex_cmp(lexem, STRING_WITH_LEN("[before 2]"))) { | |
| 3091 | 1124 | p->rule.before_level = 2; | |
| 3092 |
6/6✓ Branch 0 taken 17821 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 17765 times.
✓ Branch 3 taken 56 times.
✓ Branch 4 taken 17769 times.
✓ Branch 5 taken 56 times.
|
35646 | } else if (!lex_cmp(lexem, STRING_WITH_LEN("[before tertiary]")) || |
| 3093 | 17821 | !lex_cmp(lexem, STRING_WITH_LEN("[before 3]"))) { | |
| 3094 | 17769 | p->rule.before_level = 3; | |
| 3095 |
5/6✓ Branch 0 taken 52 times.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 52 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 52 times.
|
108 | } else if (!lex_cmp(lexem, STRING_WITH_LEN("[before quaternary]")) || |
| 3096 | 52 | !lex_cmp(lexem, STRING_WITH_LEN("[before 4]"))) { | |
| 3097 | 4 | p->rule.before_level = 4; | |
| 3098 | } else { | ||
| 3099 | 52 | p->rule.before_level = 0; | |
| 3100 | 52 | return 0; /* Don't scan the next character */ | |
| 3101 | } | ||
| 3102 | 25899 | return my_coll_parser_scan(p); | |
| 3103 | } | ||
| 3104 | |||
| 3105 | /** | ||
| 3106 | Scan logical position and add to the wide string. | ||
| 3107 | |||
| 3108 | @param p Collation customization parser | ||
| 3109 | @param pwc Wide string to add code to | ||
| 3110 | @param limit The result string cannot be longer than 'limit' characters | ||
| 3111 | |||
| 3112 | @retval 0 if logical position was not scanned. | ||
| 3113 | @retval 1 if logical position was scanned. | ||
| 3114 | */ | ||
| 3115 | |||
| 3116 | 70 | static int my_coll_parser_scan_logical_position(MY_COLL_RULE_PARSER *p, | |
| 3117 | my_wc_t *pwc, size_t limit) { | ||
| 3118 | 70 | MY_COLL_RULES *rules = p->rules; | |
| 3119 | 70 | MY_COLL_LEXEM *lexem = my_coll_parser_curr(p); | |
| 3120 | |||
| 3121 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 62 times.
|
70 | if (!lex_cmp(lexem, STRING_WITH_LEN("[first non-ignorable]"))) |
| 3122 | 8 | lexem->code = rules->uca->first_non_ignorable; | |
| 3123 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 50 times.
|
62 | else if (!lex_cmp(lexem, STRING_WITH_LEN("[last non-ignorable]"))) |
| 3124 | 12 | lexem->code = rules->uca->last_non_ignorable; | |
| 3125 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 46 times.
|
50 | else if (!lex_cmp(lexem, STRING_WITH_LEN("[first primary ignorable]"))) |
| 3126 | 4 | lexem->code = rules->uca->first_primary_ignorable; | |
| 3127 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 42 times.
|
46 | else if (!lex_cmp(lexem, STRING_WITH_LEN("[last primary ignorable]"))) |
| 3128 | 4 | lexem->code = rules->uca->last_primary_ignorable; | |
| 3129 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 36 times.
|
42 | else if (!lex_cmp(lexem, STRING_WITH_LEN("[first secondary ignorable]"))) |
| 3130 | 6 | lexem->code = rules->uca->first_secondary_ignorable; | |
| 3131 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 32 times.
|
36 | else if (!lex_cmp(lexem, STRING_WITH_LEN("[last secondary ignorable]"))) |
| 3132 | 4 | lexem->code = rules->uca->last_secondary_ignorable; | |
| 3133 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 28 times.
|
32 | else if (!lex_cmp(lexem, STRING_WITH_LEN("[first tertiary ignorable]"))) |
| 3134 | 4 | lexem->code = rules->uca->first_tertiary_ignorable; | |
| 3135 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 24 times.
|
28 | else if (!lex_cmp(lexem, STRING_WITH_LEN("[last tertiary ignorable]"))) |
| 3136 | 4 | lexem->code = rules->uca->last_tertiary_ignorable; | |
| 3137 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 20 times.
|
24 | else if (!lex_cmp(lexem, STRING_WITH_LEN("[first trailing]"))) |
| 3138 | 4 | lexem->code = rules->uca->first_trailing; | |
| 3139 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 16 times.
|
20 | else if (!lex_cmp(lexem, STRING_WITH_LEN("[last trailing]"))) |
| 3140 | 4 | lexem->code = rules->uca->last_trailing; | |
| 3141 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | else if (!lex_cmp(lexem, STRING_WITH_LEN("[first variable]"))) |
| 3142 | 8 | lexem->code = rules->uca->first_variable; | |
| 3143 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | else if (!lex_cmp(lexem, STRING_WITH_LEN("[last variable]"))) |
| 3144 | 8 | lexem->code = rules->uca->last_variable; | |
| 3145 | else | ||
| 3146 | ✗ | return 0; /* Don't scan the next token */ | |
| 3147 | |||
| 3148 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
|
70 | if (!my_coll_rule_expand(pwc, limit, lexem->code)) { |
| 3149 | /* | ||
| 3150 | Logical position can not be in a contraction, | ||
| 3151 | so the above call should never fail. | ||
| 3152 | Let's assert in debug version and print | ||
| 3153 | a nice error message in production version. | ||
| 3154 | */ | ||
| 3155 | ✗ | assert(0); | |
| 3156 | return my_coll_parser_too_long_error(p, "Logical position"); | ||
| 3157 | } | ||
| 3158 | 70 | return my_coll_parser_scan(p); | |
| 3159 | } | ||
| 3160 | |||
| 3161 | /** | ||
| 3162 | Scan character list | ||
| 3163 | |||
| 3164 | @<character list@> ::= CHAR [ CHAR... ] | ||
| 3165 | |||
| 3166 | @param p Collation customization parser | ||
| 3167 | @param pwc Character string to add code to | ||
| 3168 | @param limit The result string cannot be longer than 'limit' characters | ||
| 3169 | @param name E.g. "contraction", "expansion" | ||
| 3170 | |||
| 3171 | @retval 0 if character sequence was not scanned. | ||
| 3172 | @retval 1 if character sequence was scanned. | ||
| 3173 | */ | ||
| 3174 | |||
| 3175 | 1616654 | static int my_coll_parser_scan_character_list(MY_COLL_RULE_PARSER *p, | |
| 3176 | my_wc_t *pwc, size_t limit, | ||
| 3177 | const char *name) { | ||
| 3178 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1616654 times.
|
1616654 | if (my_coll_parser_curr(p)->term != MY_COLL_LEXEM_CHAR) |
| 3179 | ✗ | return my_coll_parser_expected_error(p, MY_COLL_LEXEM_CHAR); | |
| 3180 | |||
| 3181 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1616654 times.
|
1616654 | if (!my_coll_rule_expand(pwc, limit, my_coll_parser_curr(p)->code)) |
| 3182 | ✗ | return my_coll_parser_too_long_error(p, name); | |
| 3183 | |||
| 3184 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1616654 times.
|
1616654 | if (!my_coll_parser_scan_term(p, MY_COLL_LEXEM_CHAR)) return 0; |
| 3185 | |||
| 3186 |
2/2✓ Branch 0 taken 133639 times.
✓ Branch 1 taken 1616654 times.
|
1750293 | while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_CHAR) { |
| 3187 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 133639 times.
|
133639 | if (!my_coll_rule_expand(pwc, limit, my_coll_parser_curr(p)->code)) |
| 3188 | ✗ | return my_coll_parser_too_long_error(p, name); | |
| 3189 | 133639 | my_coll_parser_scan(p); | |
| 3190 | } | ||
| 3191 | 1616654 | return 1; | |
| 3192 | } | ||
| 3193 | |||
| 3194 | /** | ||
| 3195 | Scan reset sequence | ||
| 3196 | |||
| 3197 | @<reset sequence@> ::= | ||
| 3198 | [ @<reset before option@> ] @<character list@> | ||
| 3199 | | [ @<reset before option@> ] @<logical reset position@> | ||
| 3200 | |||
| 3201 | @param p Collation customization parser | ||
| 3202 | |||
| 3203 | @retval 0 if reset sequence was not scanned. | ||
| 3204 | @retval 1 if reset sequence was scanned. | ||
| 3205 | */ | ||
| 3206 | |||
| 3207 | 293472 | static int my_coll_parser_scan_reset_sequence(MY_COLL_RULE_PARSER *p) { | |
| 3208 | 293472 | my_coll_rule_reset(&p->rule); | |
| 3209 | |||
| 3210 | /* Scan "[before x]" option, if exists */ | ||
| 3211 |
2/2✓ Branch 0 taken 25951 times.
✓ Branch 1 taken 267521 times.
|
293472 | if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION) |
| 3212 | 25951 | my_coll_parser_scan_reset_before(p); | |
| 3213 | |||
| 3214 | /* Try logical reset position */ | ||
| 3215 |
2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 293402 times.
|
293472 | if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION) { |
| 3216 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
|
70 | if (!my_coll_parser_scan_logical_position(p, p->rule.base, 1)) return 0; |
| 3217 | } else { | ||
| 3218 | /* Scan single reset character or expansion */ | ||
| 3219 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 293402 times.
|
293402 | if (!my_coll_parser_scan_character_list(p, p->rule.base, |
| 3220 | MY_UCA_MAX_EXPANSION, "Expansion")) | ||
| 3221 | ✗ | return 0; | |
| 3222 | } | ||
| 3223 | |||
| 3224 |
2/2✓ Branch 0 taken 293468 times.
✓ Branch 1 taken 4 times.
|
293472 | if ((p->rules->shift_after_method == my_shift_method_expand || |
| 3225 |
2/2✓ Branch 0 taken 7000 times.
✓ Branch 1 taken 286468 times.
|
293468 | p->rule.before_level == 1) && |
| 3226 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 6958 times.
|
7004 | p->rules->uca->version < UCA_V900) /* Apply "before primary" option */ |
| 3227 | { | ||
| 3228 | /* | ||
| 3229 | Suppose we have this rule: &B[before primary] < C | ||
| 3230 | i.e. we need to put C before B, but after A, so | ||
| 3231 | the result order is: A < C < B. | ||
| 3232 | |||
| 3233 | Let primary weight of B be [BBBB]. | ||
| 3234 | |||
| 3235 | We cannot just use [BBBB-1] as weight for C: | ||
| 3236 | DUCET does not have enough unused weights between any two characters, | ||
| 3237 | so using [BBBB-1] will likely make C equal to the previous character, | ||
| 3238 | which is A, so we'll get this order instead of the desired: A = C < B. | ||
| 3239 | |||
| 3240 | To guarantee that that C is sorted after A, we'll use expansion | ||
| 3241 | with a kind of "biggest possible character". | ||
| 3242 | As "biggest possible character" we'll use "last_non_ignorable": | ||
| 3243 | |||
| 3244 | We'll compose weight for C as: [BBBB-1][MMMM+1] | ||
| 3245 | where [MMMM] is weight for "last_non_ignorable". | ||
| 3246 | |||
| 3247 | We also do the same trick for "reset after" if the collation | ||
| 3248 | option says so. E.g. for the rules "&B < C", weight for | ||
| 3249 | C will be calculated as: [BBBB][MMMM+1] | ||
| 3250 | |||
| 3251 | At this point we only need to store codepoints | ||
| 3252 | 'B' and 'last_non_ignorable'. Actual weights for 'C' | ||
| 3253 | will be calculated according to the above formula later, | ||
| 3254 | in create_tailoring(). | ||
| 3255 | */ | ||
| 3256 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 46 times.
|
46 | if (!my_coll_rule_expand(p->rule.base, MY_UCA_MAX_EXPANSION, |
| 3257 | 46 | p->rules->uca->last_non_ignorable)) | |
| 3258 | ✗ | return my_coll_parser_too_long_error(p, "Expansion"); | |
| 3259 | } | ||
| 3260 | 293472 | return 1; | |
| 3261 | } | ||
| 3262 | |||
| 3263 | /** | ||
| 3264 | Scan shift sequence | ||
| 3265 | |||
| 3266 | @<shift sequence@> ::= | ||
| 3267 | @<character list@> [ / @<character list@> ] | ||
| 3268 | | @<character list@> [ | @<character list@> ] | ||
| 3269 | |||
| 3270 | @param p Collation customization parser | ||
| 3271 | |||
| 3272 | @retval 0 if shift sequence was not scanned. | ||
| 3273 | @retval 1 if shift sequence was scanned. | ||
| 3274 | */ | ||
| 3275 | |||
| 3276 | 1062235 | static int my_coll_parser_scan_shift_sequence(MY_COLL_RULE_PARSER *p) { | |
| 3277 | MY_COLL_RULE before_extend; | ||
| 3278 | |||
| 3279 | 1062235 | memset(&p->rule.curr, 0, sizeof(p->rule.curr)); | |
| 3280 | |||
| 3281 | /* Scan single shift character or contraction */ | ||
| 3282 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1062235 times.
|
1062235 | if (!my_coll_parser_scan_character_list( |
| 3283 |
1/2✓ Branch 0 taken 1062235 times.
✗ Branch 1 not taken.
|
1062235 | p, p->rule.curr, MY_UCA_MAX_CONTRACTION, "Contraction")) |
| 3284 | ✗ | return 0; | |
| 3285 | |||
| 3286 | 1062235 | before_extend = p->rule; /* Remember the part before "/" */ | |
| 3287 | |||
| 3288 | /* Append the part after "/" as expansion */ | ||
| 3289 |
2/2✓ Branch 0 taken 11641 times.
✓ Branch 1 taken 1050594 times.
|
1062235 | if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_EXTEND) { |
| 3290 |
1/2✓ Branch 0 taken 11641 times.
✗ Branch 1 not taken.
|
11641 | my_coll_parser_scan(p); |
| 3291 |
2/4✓ Branch 0 taken 11641 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 11641 times.
|
11641 | if (!my_coll_parser_scan_character_list(p, p->rule.base, |
| 3292 | MY_UCA_MAX_EXPANSION, "Expansion")) | ||
| 3293 | ✗ | return 0; | |
| 3294 |
2/2✓ Branch 0 taken 204479 times.
✓ Branch 1 taken 846115 times.
|
1050594 | } else if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_CONTEXT) { |
| 3295 | /* | ||
| 3296 | We support 2-character long context sequences only: | ||
| 3297 | one character is the previous context, plus the current character. | ||
| 3298 | It's OK as Unicode's CLDR does not have longer examples. | ||
| 3299 | */ | ||
| 3300 |
1/2✓ Branch 0 taken 204479 times.
✗ Branch 1 not taken.
|
204479 | my_coll_parser_scan(p); |
| 3301 | 204479 | p->rule.with_context = true; | |
| 3302 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 204479 times.
|
204479 | if (!my_coll_parser_scan_character_list( |
| 3303 |
1/2✓ Branch 0 taken 204479 times.
✗ Branch 1 not taken.
|
204479 | p, p->rule.curr + 1, MY_UCA_MAX_EXPANSION - 1, "context")) |
| 3304 | ✗ | return 0; | |
| 3305 | /* | ||
| 3306 | It might be CONTEXT followed by EXPANSION. For example, Japanese | ||
| 3307 | collation has one rule defined as: | ||
| 3308 | "&[before 3]へ<<<へ|ゝ=べ|ゝ=へ|ゞ/\u3099" | ||
| 3309 | The part of "へ|ゞ/\u3099" is CONTEXT ('|') followed by EXPANSION ('/'). | ||
| 3310 | */ | ||
| 3311 |
2/2✓ Branch 0 taken 44897 times.
✓ Branch 1 taken 159582 times.
|
204479 | if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_EXTEND) { |
| 3312 |
1/2✓ Branch 0 taken 44897 times.
✗ Branch 1 not taken.
|
44897 | my_coll_parser_scan(p); |
| 3313 | 44897 | size_t len = my_wstrnlen(p->rule.base, MY_UCA_MAX_EXPANSION); | |
| 3314 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 44897 times.
|
44897 | if (!my_coll_parser_scan_character_list( |
| 3315 |
1/2✓ Branch 0 taken 44897 times.
✗ Branch 1 not taken.
|
44897 | p, p->rule.base + len, MY_UCA_MAX_EXPANSION - len, "Expansion")) |
| 3316 | ✗ | return 0; | |
| 3317 | } | ||
| 3318 | } | ||
| 3319 | |||
| 3320 | /* Add rule to the rule list */ | ||
| 3321 |
2/4✓ Branch 0 taken 1062235 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1062235 times.
|
1062235 | if (my_coll_rules_add(p->rules, &p->rule)) return 0; |
| 3322 | |||
| 3323 | 1062235 | p->rule = before_extend; /* Restore to the state before "/" */ | |
| 3324 | |||
| 3325 | 1062235 | return 1; | |
| 3326 | } | ||
| 3327 | |||
| 3328 | /** | ||
| 3329 | Scan shift operator | ||
| 3330 | |||
| 3331 | @<shift@> ::= < | << | <<< | <<<< | = | ||
| 3332 | |||
| 3333 | @param p Collation customization parser | ||
| 3334 | |||
| 3335 | @retval 0 if shift operator was not scanned. | ||
| 3336 | @retval 1 if shift operator was scanned. | ||
| 3337 | */ | ||
| 3338 | 1355707 | static int my_coll_parser_scan_shift(MY_COLL_RULE_PARSER *p) { | |
| 3339 |
2/2✓ Branch 0 taken 1062235 times.
✓ Branch 1 taken 293472 times.
|
1355707 | if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_SHIFT) { |
| 3340 | 1062235 | my_coll_rule_shift_at_level(&p->rule, my_coll_parser_curr(p)->diff); | |
| 3341 | 1062235 | return my_coll_parser_scan(p); | |
| 3342 | } | ||
| 3343 | 293472 | return 0; | |
| 3344 | } | ||
| 3345 | |||
| 3346 | /** | ||
| 3347 | Scan one rule: reset followed by a number of shifts | ||
| 3348 | |||
| 3349 | @<rule@> ::= | ||
| 3350 | & @<reset sequence@> | ||
| 3351 | @<shift@> @<shift sequence@> | ||
| 3352 | [ { @<shift@> @<shift sequence@> }... ] | ||
| 3353 | |||
| 3354 | @param p Collation customization parser | ||
| 3355 | |||
| 3356 | @retval 0 if rule was not scanned. | ||
| 3357 | @retval 1 if rule was scanned. | ||
| 3358 | */ | ||
| 3359 | 293472 | static int my_coll_parser_scan_rule(MY_COLL_RULE_PARSER *p) { | |
| 3360 |
2/4✓ Branch 0 taken 293472 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 293472 times.
|
586944 | if (!my_coll_parser_scan_term(p, MY_COLL_LEXEM_RESET) || |
| 3361 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 293472 times.
|
293472 | !my_coll_parser_scan_reset_sequence(p)) |
| 3362 | ✗ | return 0; | |
| 3363 | |||
| 3364 | /* Scan the first required shift command */ | ||
| 3365 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 293471 times.
|
293472 | if (!my_coll_parser_scan_shift(p)) |
| 3366 | 1 | return my_coll_parser_expected_error(p, MY_COLL_LEXEM_SHIFT); | |
| 3367 | |||
| 3368 | /* Scan the first shift sequence */ | ||
| 3369 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 293471 times.
|
293471 | if (!my_coll_parser_scan_shift_sequence(p)) return 0; |
| 3370 | |||
| 3371 | /* Scan subsequent shift rules */ | ||
| 3372 |
2/2✓ Branch 0 taken 768764 times.
✓ Branch 1 taken 293471 times.
|
1062235 | while (my_coll_parser_scan_shift(p)) { |
| 3373 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 768764 times.
|
768764 | if (!my_coll_parser_scan_shift_sequence(p)) return 0; |
| 3374 | } | ||
| 3375 | 293471 | return 1; | |
| 3376 | } | ||
| 3377 | |||
| 3378 | /** | ||
| 3379 | Scan collation customization: settings followed by rules | ||
| 3380 | |||
| 3381 | @<collation customization@> ::= | ||
| 3382 | [ @<setting@> ... ] | ||
| 3383 | [ @<rule@>... ] | ||
| 3384 | |||
| 3385 | @param p Collation customization parser | ||
| 3386 | |||
| 3387 | @retval 0 if collation customization expression was not scanned. | ||
| 3388 | @retval 1 if collation customization expression was scanned. | ||
| 3389 | */ | ||
| 3390 | |||
| 3391 | 29009 | static int my_coll_parser_exec(MY_COLL_RULE_PARSER *p) { | |
| 3392 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 29007 times.
|
29009 | if (!my_coll_parser_scan_settings(p)) return 0; |
| 3393 | |||
| 3394 |
2/2✓ Branch 0 taken 293472 times.
✓ Branch 1 taken 29006 times.
|
322478 | while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_RESET) { |
| 3395 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 293471 times.
|
293472 | if (!my_coll_parser_scan_rule(p)) return 0; |
| 3396 | } | ||
| 3397 | /* Make sure no unparsed input data left */ | ||
| 3398 | 29006 | return my_coll_parser_scan_term(p, MY_COLL_LEXEM_EOF); | |
| 3399 | } | ||
| 3400 | |||
| 3401 | /* | ||
| 3402 | Collation language syntax parser. | ||
| 3403 | Uses lexical parser. | ||
| 3404 | |||
| 3405 | @param rules Collation rule list to load to. | ||
| 3406 | @param str A string with collation customization. | ||
| 3407 | @param str_end End of the string. | ||
| 3408 | @param col_name Collation name | ||
| 3409 | |||
| 3410 | @retval 0 on success | ||
| 3411 | @retval 1 on error | ||
| 3412 | */ | ||
| 3413 | |||
| 3414 | 29009 | static int my_coll_rule_parse(MY_COLL_RULES *rules, const char *str, | |
| 3415 | const char *str_end, const char *col_name) { | ||
| 3416 | MY_COLL_RULE_PARSER p; | ||
| 3417 | |||
| 3418 |
1/2✓ Branch 0 taken 29009 times.
✗ Branch 1 not taken.
|
29009 | my_coll_parser_init(&p, rules, str, str_end); |
| 3419 | |||
| 3420 |
3/4✓ Branch 0 taken 29009 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 29006 times.
|
29009 | if (!my_coll_parser_exec(&p)) { |
| 3421 | 3 | rules->loader->errcode = EE_COLLATION_PARSER_ERROR; | |
| 3422 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | my_coll_lexem_print_error(my_coll_parser_curr(&p), rules->loader->errarg, |
| 3423 | sizeof(rules->loader->errarg) - 1, p.errstr, | ||
| 3424 | col_name); | ||
| 3425 | 3 | return 1; | |
| 3426 | } | ||
| 3427 | 29006 | return 0; | |
| 3428 | } | ||
| 3429 | |||
| 3430 | 7536 | static void spread_case_mask(uint16 *to, size_t to_stride, | |
| 3431 | size_t tailored_ce_cnt, uint16 case_mask) { | ||
| 3432 |
2/2✓ Branch 0 taken 18526 times.
✓ Branch 1 taken 7536 times.
|
26062 | for (size_t i = 0; i < tailored_ce_cnt; ++i) { |
| 3433 | 18526 | uint16 *case_weight = &to[(i * MY_UCA_900_CE_SIZE + 2) * to_stride]; | |
| 3434 |
2/2✓ Branch 0 taken 8164 times.
✓ Branch 1 taken 10362 times.
|
18526 | if (*case_weight > CASE_FIRST_UPPER_MASK) |
| 3435 | 8164 | case_mask = *case_weight & 0xFF00; | |
| 3436 |
2/2✓ Branch 0 taken 6751 times.
✓ Branch 1 taken 3611 times.
|
10362 | else if (*case_weight) |
| 3437 | 6751 | *case_weight |= case_mask; | |
| 3438 | } | ||
| 3439 | 7536 | } | |
| 3440 | |||
| 3441 | /* | ||
| 3442 | If the collation is marked as [caseFirst upper], move all of the weights | ||
| 3443 | around to accommodate that. Only tailored weights are changed; for non-tailored | ||
| 3444 | weights, we do it on-the-fly in uca_scanner_900::apply_case_first(). | ||
| 3445 | |||
| 3446 | [caseFirst upper] is a directive that says that case should override all | ||
| 3447 | other tertiary case concerns (in a sense, a “level 2.5”), and furthermore, | ||
| 3448 | that uppercase should come before lowercase. (Normally lowercase sorts | ||
| 3449 | before uppercase.) It is currently only used in the Danish collation. | ||
| 3450 | |||
| 3451 | This is done by looking at the tertiary weight, inferring the case from it, | ||
| 3452 | and then using the upper bits (which are normally unused) to signal the case. | ||
| 3453 | The algorithm is detailed in Unicode TR35, section 3.14, although we don't | ||
| 3454 | seem to follow it exactly. | ||
| 3455 | */ | ||
| 3456 | 1250794 | static void change_weight_if_case_first(CHARSET_INFO *cs, | |
| 3457 | const MY_UCA_INFO *dst, MY_COLL_RULE *r, | ||
| 3458 | uint16 *to, size_t to_stride, | ||
| 3459 | size_t curr_len, | ||
| 3460 | size_t tailored_ce_cnt) { | ||
| 3461 | /* We only need to implement [caseFirst upper] right now. */ | ||
| 3462 |
4/4✓ Branch 0 taken 518711 times.
✓ Branch 1 taken 732083 times.
✓ Branch 2 taken 7536 times.
✓ Branch 3 taken 511175 times.
|
1250794 | if (!(cs->coll_param && cs->coll_param->case_first == CASE_FIRST_UPPER && |
| 3463 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7536 times.
|
7536 | cs->levels_for_compare == 3)) |
| 3464 | 1243258 | return; | |
| 3465 | |||
| 3466 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7536 times.
|
7536 | assert(cs->uca->version == UCA_V900); |
| 3467 | |||
| 3468 | // How many CEs this character has with non-ignorable primary weight. | ||
| 3469 | 7536 | int tailored_pri_cnt = 0; | |
| 3470 | 7536 | int origin_pri_cnt = 0; | |
| 3471 |
2/2✓ Branch 0 taken 12089 times.
✓ Branch 1 taken 4239 times.
|
16328 | for (size_t i = 0; i < tailored_ce_cnt; ++i) { |
| 3472 | /* | ||
| 3473 | If rule A has already applied a case weight change, and we have rule B | ||
| 3474 | which is inherited from A, apply the same case weight change on the rest | ||
| 3475 | of rule B and return. | ||
| 3476 | */ | ||
| 3477 |
2/2✓ Branch 0 taken 3297 times.
✓ Branch 1 taken 8792 times.
|
12089 | if (to[(i * MY_UCA_900_CE_SIZE + 2) * to_stride] > CASE_FIRST_UPPER_MASK) { |
| 3478 | 3297 | spread_case_mask(to, to_stride, tailored_ce_cnt, /*case_mask=*/0); | |
| 3479 | 3297 | return; | |
| 3480 | } | ||
| 3481 |
2/2✓ Branch 0 taken 7222 times.
✓ Branch 1 taken 1570 times.
|
8792 | if (to[i * MY_UCA_900_CE_SIZE * to_stride]) tailored_pri_cnt++; |
| 3482 | } | ||
| 3483 |
3/4✓ Branch 0 taken 1570 times.
✓ Branch 1 taken 2669 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1570 times.
|
4239 | if (r->before_level == 1 || r->diff[0]) tailored_pri_cnt--; |
| 3484 | |||
| 3485 | // Use the DUCET weight to detect the character's case. | ||
| 3486 | 4239 | MY_UCA_INFO *src = &my_uca_v900; | |
| 3487 | 4239 | int changed_ce = 0; | |
| 3488 | |||
| 3489 | 4239 | my_wc_t *curr = r->curr; | |
| 3490 |
2/2✓ Branch 0 taken 4710 times.
✓ Branch 1 taken 4239 times.
|
8949 | for (size_t i = 0; i < curr_len; ++i) { |
| 3491 | 4710 | const uint16 *from = my_char_weight_addr_900(src, *curr); | |
| 3492 | 4710 | uint page = *curr >> 8; | |
| 3493 | 4710 | uint code = *curr & 0xFF; | |
| 3494 | 4710 | curr++; | |
| 3495 | 4710 | int ce_cnt = | |
| 3496 |
1/2✓ Branch 0 taken 4710 times.
✗ Branch 1 not taken.
|
4710 | src->weights[page] ? UCA900_NUM_OF_CE(src->weights[page], code) : 0; |
| 3497 |
2/2✓ Branch 0 taken 8792 times.
✓ Branch 1 taken 4710 times.
|
13502 | for (int i_ce = 0; i_ce < ce_cnt; ++i_ce) { |
| 3498 |
2/2✓ Branch 0 taken 5338 times.
✓ Branch 1 taken 3454 times.
|
8792 | if (from[i_ce * UCA900_DISTANCE_BETWEEN_WEIGHTS]) origin_pri_cnt++; |
| 3499 | } | ||
| 3500 | } | ||
| 3501 | 4239 | int case_to_copy = 0; | |
| 3502 |
2/2✓ Branch 0 taken 3140 times.
✓ Branch 1 taken 1099 times.
|
4239 | if (origin_pri_cnt <= tailored_pri_cnt) |
| 3503 | 3140 | case_to_copy = origin_pri_cnt; | |
| 3504 | else | ||
| 3505 | 1099 | case_to_copy = tailored_pri_cnt - 1; | |
| 3506 | 4239 | int upper_cnt = 0; | |
| 3507 | 4239 | int lower_cnt = 0; | |
| 3508 | 4239 | curr = r->curr; | |
| 3509 | 4239 | uint16 case_mask = 0; | |
| 3510 |
2/2✓ Branch 0 taken 4710 times.
✓ Branch 1 taken 4239 times.
|
8949 | for (size_t curr_ind = 0; curr_ind < curr_len; ++curr_ind) { |
| 3511 | 4710 | const uint16 *from = my_char_weight_addr_900(src, *curr); | |
| 3512 | 4710 | uint page = *curr >> 8; | |
| 3513 | 4710 | uint code = *curr & 0xFF; | |
| 3514 | 4710 | curr++; | |
| 3515 | 4710 | int ce_cnt = | |
| 3516 |
1/2✓ Branch 0 taken 4710 times.
✗ Branch 1 not taken.
|
4710 | src->weights[page] ? UCA900_NUM_OF_CE(src->weights[page], code) : 0; |
| 3517 | 4710 | changed_ce = 0; | |
| 3518 |
2/2✓ Branch 0 taken 8792 times.
✓ Branch 1 taken 4710 times.
|
13502 | for (int i_ce = 0; i_ce < ce_cnt; ++i_ce) { |
| 3519 | 8792 | uint16 primary_weight = from[i_ce * UCA900_DISTANCE_BETWEEN_WEIGHTS]; | |
| 3520 |
2/2✓ Branch 0 taken 5338 times.
✓ Branch 1 taken 3454 times.
|
8792 | if (primary_weight) { |
| 3521 | 5338 | uint16 case_weight = from[i_ce * UCA900_DISTANCE_BETWEEN_WEIGHTS + | |
| 3522 | 5338 | 2 * UCA900_DISTANCE_BETWEEN_LEVELS]; | |
| 3523 | 5338 | uint16 *ce_to = nullptr; | |
| 3524 |
2/2✓ Branch 0 taken 2669 times.
✓ Branch 1 taken 2669 times.
|
5338 | if (is_tertiary_weight_upper_case(case_weight)) { |
| 3525 |
2/2✓ Branch 0 taken 1099 times.
✓ Branch 1 taken 1570 times.
|
2669 | if (!case_to_copy) |
| 3526 | 1099 | upper_cnt++; | |
| 3527 | else | ||
| 3528 | 1570 | case_mask = CASE_FIRST_UPPER_MASK; | |
| 3529 | } else { | ||
| 3530 |
2/2✓ Branch 0 taken 1099 times.
✓ Branch 1 taken 1570 times.
|
2669 | if (!case_to_copy) |
| 3531 | 1099 | lower_cnt++; | |
| 3532 | else | ||
| 3533 | 1570 | case_mask = CASE_FIRST_LOWER_MASK; | |
| 3534 | } | ||
| 3535 |
2/2✓ Branch 0 taken 3140 times.
✓ Branch 1 taken 2198 times.
|
5338 | if (case_to_copy) { |
| 3536 | do { | ||
| 3537 | 3140 | ce_to = to + changed_ce * MY_UCA_900_CE_SIZE * to_stride; | |
| 3538 | 3140 | changed_ce++; | |
| 3539 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3140 times.
|
3140 | } while (*ce_to == 0); |
| 3540 | 3140 | ce_to[2 * to_stride] |= case_mask; | |
| 3541 | 3140 | case_to_copy--; | |
| 3542 | } | ||
| 3543 | } | ||
| 3544 | } | ||
| 3545 | } | ||
| 3546 |
2/2✓ Branch 0 taken 3140 times.
✓ Branch 1 taken 1099 times.
|
4239 | if (origin_pri_cnt <= tailored_pri_cnt) { |
| 3547 |
2/2✓ Branch 0 taken 314 times.
✓ Branch 1 taken 3140 times.
|
3454 | for (int i = origin_pri_cnt; i < tailored_pri_cnt; ++i) { |
| 3548 | 314 | const int offset = changed_ce * MY_UCA_900_CE_SIZE * to_stride; | |
| 3549 |
2/4✓ Branch 0 taken 314 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 314 times.
✗ Branch 3 not taken.
|
314 | if (to[offset] && to[offset] < dst->extra_ce_pri_base) |
| 3550 | 314 | to[offset + 2 * to_stride] = 0; | |
| 3551 | } | ||
| 3552 | } else { | ||
| 3553 |
4/4✓ Branch 0 taken 628 times.
✓ Branch 1 taken 471 times.
✓ Branch 2 taken 157 times.
✓ Branch 3 taken 471 times.
|
1099 | if (upper_cnt && lower_cnt) |
| 3554 | 157 | case_mask = CASE_FIRST_MIXED_MASK; | |
| 3555 |
3/4✓ Branch 0 taken 471 times.
✓ Branch 1 taken 471 times.
✓ Branch 2 taken 471 times.
✗ Branch 3 not taken.
|
942 | else if (upper_cnt && !lower_cnt) |
| 3556 | 471 | case_mask = CASE_FIRST_UPPER_MASK; | |
| 3557 | else | ||
| 3558 | 471 | case_mask = CASE_FIRST_LOWER_MASK; | |
| 3559 | 1099 | bool skipped_extra_ce = false; | |
| 3560 |
2/2✓ Branch 0 taken 2198 times.
✓ Branch 1 taken 1099 times.
|
3297 | for (int i = tailored_ce_cnt - 1; i >= 0; --i) { |
| 3561 | 2198 | int offset = i * MY_UCA_900_CE_SIZE * to_stride; | |
| 3562 |
3/4✓ Branch 0 taken 2198 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1099 times.
✓ Branch 3 taken 1099 times.
|
2198 | if (to[offset] && to[offset] < dst->extra_ce_pri_base) { |
| 3563 |
2/6✗ Branch 0 not taken.
✓ Branch 1 taken 1099 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1099 times.
✗ Branch 5 not taken.
|
1099 | if ((r->before_level == 1 || r->diff[0]) && !skipped_extra_ce) { |
| 3564 | 1099 | skipped_extra_ce = true; | |
| 3565 | 1099 | continue; | |
| 3566 | } | ||
| 3567 | ✗ | to[(i * MY_UCA_900_CE_SIZE + 2) * to_stride] |= case_mask; | |
| 3568 | ✗ | break; | |
| 3569 | } | ||
| 3570 | } | ||
| 3571 | } | ||
| 3572 | 4239 | spread_case_mask(to, to_stride, tailored_ce_cnt, case_mask); | |
| 3573 | } | ||
| 3574 | |||
| 3575 | 734767 | static size_t my_char_weight_put_900(MY_UCA_INFO *dst, uint16 *to, | |
| 3576 | size_t to_stride, size_t to_length, | ||
| 3577 | uint16 *to_num_ce, | ||
| 3578 | const MY_COLL_RULE *rule, | ||
| 3579 | size_t base_len) { | ||
| 3580 | size_t count; | ||
| 3581 | 734767 | int total_ce_cnt = 0; | |
| 3582 | |||
| 3583 | 734767 | const my_wc_t *base = rule->base; | |
| 3584 |
2/2✓ Branch 0 taken 1034642 times.
✓ Branch 1 taken 734767 times.
|
1769409 | for (count = 0; base_len;) { |
| 3585 | 1034642 | const uint16 *from = nullptr; | |
| 3586 | 1034642 | size_t from_stride = 0; | |
| 3587 | 1034642 | int ce_cnt = 0; | |
| 3588 | |||
| 3589 |
2/2✓ Branch 0 taken 366380 times.
✓ Branch 1 taken 1015819 times.
|
1382199 | for (size_t chlen = base_len; chlen > 1; chlen--) { |
| 3590 |
3/4✓ Branch 0 taken 366380 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 18823 times.
✓ Branch 3 taken 347557 times.
|
366380 | if ((from = my_uca_contraction_weight(dst->contraction_nodes, base, |
| 3591 | chlen))) { | ||
| 3592 | 18823 | from_stride = 1; | |
| 3593 | 18823 | base += chlen; | |
| 3594 | 18823 | base_len -= chlen; | |
| 3595 | 18823 | ce_cnt = *(from + MY_UCA_MAX_WEIGHT_SIZE - 1); | |
| 3596 | 18823 | break; | |
| 3597 | } | ||
| 3598 | } | ||
| 3599 | |||
| 3600 |
2/2✓ Branch 0 taken 1015819 times.
✓ Branch 1 taken 18823 times.
|
1034642 | if (!from) { |
| 3601 | 1015819 | uint page = *base >> 8; | |
| 3602 | 1015819 | uint code = *base & 0xFF; | |
| 3603 | 1015819 | base++; | |
| 3604 | 1015819 | base_len--; | |
| 3605 |
1/2✓ Branch 0 taken 1015819 times.
✗ Branch 1 not taken.
|
1015819 | if (dst->weights[page]) { |
| 3606 | 1015819 | from = UCA900_WEIGHT_ADDR(dst->weights[page], /*level=*/0, code); | |
| 3607 | 1015819 | from_stride = UCA900_DISTANCE_BETWEEN_LEVELS; | |
| 3608 | 1015819 | ce_cnt = UCA900_NUM_OF_CE(dst->weights[page], code); | |
| 3609 | } | ||
| 3610 | } | ||
| 3611 | |||
| 3612 | 1034642 | for (int weight_ind = 0; | |
| 3613 |
3/4✓ Branch 0 taken 3741231 times.
✓ Branch 1 taken 1034642 times.
✓ Branch 2 taken 3741231 times.
✗ Branch 3 not taken.
|
4775873 | weight_ind < ce_cnt * MY_UCA_900_CE_SIZE && count < to_length; |
| 3614 | weight_ind++) { | ||
| 3615 | 3741231 | *to = *from; | |
| 3616 | 3741231 | to += to_stride; | |
| 3617 | 3741231 | from += from_stride; | |
| 3618 | 3741231 | count++; | |
| 3619 | } | ||
| 3620 | 1034642 | total_ce_cnt += ce_cnt; | |
| 3621 | } | ||
| 3622 | |||
| 3623 | /* | ||
| 3624 | For shift on primary weight, there might be no enough room in the tables. | ||
| 3625 | For example, Sihala has the rule "&\\u0DA5 < \\u0DA4", which means | ||
| 3626 | that we should move U+0DA4 after U+0DA5 (on the primary level). | ||
| 3627 | However, there is no room after U+0DA5 in DUCET unless we wanted to | ||
| 3628 | conflict with U+0DA6: | ||
| 3629 | |||
| 3630 | 0DA4 ; [.28EC.0020.0002] # SINHALA LETTER TAALUJA NAASIKYAYA | ||
| 3631 | 0DA5 ; [.28ED.0020.0002] # SINHALA LETTER TAALUJA SANYOOGA NAAKSIKYAYA | ||
| 3632 | 0DA6 ; [.28EE.0020.0002] # SINHALA LETTER SANYAKA JAYANNA | ||
| 3633 | |||
| 3634 | Before our implementation of UCA 9.0.0, the shift on primary weight was | ||
| 3635 | done by making it a fake expansion when parsing the rule, where we'd expand | ||
| 3636 | U+0DA4 to U+0DA5 U+MMMM, MMMM being 'last_non_ignorable'. (This happens | ||
| 3637 | in my_coll_parser_scan_reset_sequence()). But from UCA 9.0.0, we also | ||
| 3638 | support accent- and case-sensitive collations, and then, having the extra | ||
| 3639 | weights of 'last_non_ignorable' (which is just a random character) on the | ||
| 3640 | second and third level may cause unexpected results for algorithms that | ||
| 3641 | use the meaning of the tertiary weight to infer case. Thus, we'll abandon | ||
| 3642 | the fake expansion way; instead, instead add an extra CE (after the one | ||
| 3643 | from U+0DA5, the character we are moving after) to represent all the | ||
| 3644 | weights we might want to shift. The actual shifting happens in | ||
| 3645 | apply_shift_900(). | ||
| 3646 | |||
| 3647 | For the rule "&\\u0DA5 < \\u0DA4", U+0DA4's weights become | ||
| 3648 | [.28ED.0020.0002][.54A4.0000.0000], where 0x54A4 is the value of | ||
| 3649 | extra_ce_pri_base. We then apply the differences from the rule | ||
| 3650 | (which are never negative) to the last CE, so that it becomes | ||
| 3651 | e.g. [.54A5.0000.0000]. | ||
| 3652 | */ | ||
| 3653 |
7/8✓ Branch 0 taken 617612 times.
✓ Branch 1 taken 117155 times.
✓ Branch 2 taken 584834 times.
✓ Branch 3 taken 32778 times.
✓ Branch 4 taken 287768 times.
✓ Branch 5 taken 297066 times.
✓ Branch 6 taken 437701 times.
✗ Branch 7 not taken.
|
734767 | if ((rule->diff[0] || rule->diff[1] || rule->diff[2]) && count < to_length) { |
| 3654 |
2/2✓ Branch 0 taken 117155 times.
✓ Branch 1 taken 320546 times.
|
437701 | *to = rule->diff[0] ? dst->extra_ce_pri_base : 0; |
| 3655 | 437701 | to += to_stride; | |
| 3656 |
2/2✓ Branch 0 taken 46742 times.
✓ Branch 1 taken 390959 times.
|
437701 | *to = rule->diff[1] ? dst->extra_ce_sec_base : 0; |
| 3657 | 437701 | to += to_stride; | |
| 3658 |
2/2✓ Branch 0 taken 371062 times.
✓ Branch 1 taken 66639 times.
|
437701 | *to = rule->diff[2] ? dst->extra_ce_ter_base : 0; |
| 3659 | 437701 | to += to_stride; | |
| 3660 | 437701 | total_ce_cnt++; | |
| 3661 | 437701 | count += 3; | |
| 3662 | } | ||
| 3663 | 734767 | total_ce_cnt = | |
| 3664 | 734767 | std::min(total_ce_cnt, (MY_UCA_MAX_WEIGHT_SIZE - 1) / MY_UCA_900_CE_SIZE); | |
| 3665 | 734767 | *to_num_ce = total_ce_cnt; | |
| 3666 | |||
| 3667 | 734767 | return total_ce_cnt; | |
| 3668 | } | ||
| 3669 | |||
| 3670 | /** | ||
| 3671 | Helper function: | ||
| 3672 | Copies UCA weights for a given "uint" string | ||
| 3673 | to the given location. | ||
| 3674 | |||
| 3675 | @param dst destination UCA weight data | ||
| 3676 | @param to destination address | ||
| 3677 | @param to_stride number of bytes between each successive weight in "to" | ||
| 3678 | @param to_length size of destination | ||
| 3679 | @param to_num_ce where to put the number of CEs generated | ||
| 3680 | @param rule The rule that contains the characters whose weight | ||
| 3681 | are to copied | ||
| 3682 | @param base_len The length of base character list | ||
| 3683 | @param uca_ver UCA version | ||
| 3684 | |||
| 3685 | @return number of weights put | ||
| 3686 | */ | ||
| 3687 | |||
| 3688 | 1250794 | static size_t my_char_weight_put(MY_UCA_INFO *dst, uint16 *to, size_t to_stride, | |
| 3689 | size_t to_length, uint16 *to_num_ce, | ||
| 3690 | const MY_COLL_RULE *rule, size_t base_len, | ||
| 3691 | enum_uca_ver uca_ver) { | ||
| 3692 |
2/2✓ Branch 0 taken 734767 times.
✓ Branch 1 taken 516027 times.
|
1250794 | if (uca_ver == UCA_V900) |
| 3693 | 734767 | return my_char_weight_put_900(dst, to, to_stride, to_length, to_num_ce, | |
| 3694 | 734767 | rule, base_len); | |
| 3695 | |||
| 3696 | 516027 | const my_wc_t *base = rule->base; | |
| 3697 | 516027 | size_t count = 0; | |
| 3698 |
2/2✓ Branch 0 taken 525319 times.
✓ Branch 1 taken 516027 times.
|
1041346 | while (base_len != 0) { |
| 3699 | 525319 | const uint16 *from = nullptr; | |
| 3700 | |||
| 3701 |
2/2✓ Branch 0 taken 10608 times.
✓ Branch 1 taken 524115 times.
|
534723 | for (size_t chlen = base_len; chlen > 1; chlen--) { |
| 3702 |
2/2✓ Branch 0 taken 1204 times.
✓ Branch 1 taken 9404 times.
|
10608 | if ((from = my_uca_contraction_weight(dst->contraction_nodes, base, |
| 3703 | chlen))) { | ||
| 3704 | 1204 | base += chlen; | |
| 3705 | 1204 | base_len -= chlen; | |
| 3706 | 1204 | break; | |
| 3707 | } | ||
| 3708 | } | ||
| 3709 | |||
| 3710 |
2/2✓ Branch 0 taken 524115 times.
✓ Branch 1 taken 1204 times.
|
525319 | if (!from) { |
| 3711 | 524115 | from = my_char_weight_addr(dst, *base); | |
| 3712 | 524115 | base++; | |
| 3713 | 524115 | base_len--; | |
| 3714 | } | ||
| 3715 | |||
| 3716 |
4/6✓ Branch 0 taken 1047646 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 522327 times.
✓ Branch 3 taken 525319 times.
✓ Branch 4 taken 522327 times.
✗ Branch 5 not taken.
|
1047646 | for (; from && *from && count < to_length;) { |
| 3717 | 522327 | *to = *from++; | |
| 3718 | 522327 | to += to_stride; | |
| 3719 | 522327 | count++; | |
| 3720 | } | ||
| 3721 | } | ||
| 3722 | |||
| 3723 | 516027 | *to = 0; | |
| 3724 | 516027 | return count; | |
| 3725 | } | ||
| 3726 | |||
| 3727 | /** | ||
| 3728 | Alloc new page and copy the default UCA weights | ||
| 3729 | @param cs Character set | ||
| 3730 | @param loader Character set loader | ||
| 3731 | @param src Default UCA data to copy from | ||
| 3732 | @param dst UCA data to copy weights to | ||
| 3733 | @param page page number | ||
| 3734 | |||
| 3735 | @retval false on success | ||
| 3736 | @retval true on error | ||
| 3737 | */ | ||
| 3738 | 81306 | static bool my_uca_copy_page(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader, | |
| 3739 | const MY_UCA_INFO *src, MY_UCA_INFO *dst, | ||
| 3740 | size_t page) { | ||
| 3741 | 81306 | const uint dst_size = 256 * dst->lengths[page] * sizeof(uint16); | |
| 3742 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 81306 times.
|
81306 | if (!(dst->weights[page] = (uint16 *)(loader->once_alloc)(dst_size))) |
| 3743 | ✗ | return true; | |
| 3744 | |||
| 3745 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 81306 times.
|
81306 | assert(src->lengths[page] <= dst->lengths[page]); |
| 3746 | 81306 | memset(dst->weights[page], 0, dst_size); | |
| 3747 |
3/4✓ Branch 0 taken 81306 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 50238 times.
✓ Branch 3 taken 31068 times.
|
81306 | if (cs->uca && cs->uca->version == UCA_V900) { |
| 3748 | 50238 | const uint src_size = 256 * src->lengths[page] * sizeof(uint16); | |
| 3749 | 50238 | memcpy(dst->weights[page], src->weights[page], src_size); | |
| 3750 |
2/2✓ Branch 0 taken 30734 times.
✓ Branch 1 taken 334 times.
|
81306 | } else if (src->lengths[page] > 0) { |
| 3751 |
2/2✓ Branch 0 taken 7867904 times.
✓ Branch 1 taken 30734 times.
|
7898638 | for (uint chc = 0; chc < 256; chc++) { |
| 3752 | 7867904 | memcpy(dst->weights[page] + chc * dst->lengths[page], | |
| 3753 | 7867904 | src->weights[page] + chc * src->lengths[page], | |
| 3754 | 7867904 | src->lengths[page] * sizeof(uint16)); | |
| 3755 | } | ||
| 3756 | } | ||
| 3757 | 81306 | return false; | |
| 3758 | } | ||
| 3759 | |||
| 3760 | /* | ||
| 3761 | This is used to apply the weight shift if there is a [before 1] rule. | ||
| 3762 | If we have a rule "&[before 1] A < B < C", and A's collation element is [P, S, | ||
| 3763 | T], then in my_char_weight_put_900(), we append one extra collation element to | ||
| 3764 | A's CE to be B and C's CE. So B and C's CE becomes [P, S, T][p, 0, 0]. What we | ||
| 3765 | do with this function is to change B's CE to [P - 1, S, T][p + n, 0, 0]. | ||
| 3766 | 1. The rule "&[before 1] A < B < C" means "B < C < A" on primary level. Since | ||
| 3767 | "B < A", so we give B the first primary weight as (P - 1). | ||
| 3768 | 2. p is a weight value which is the maximum regular primary weight in DUCET | ||
| 3769 | plus one (0x54A3 + 1 = 0x54A4). This is to make sure B's primary weight | ||
| 3770 | less than A and greater than any character which sorts before A. | ||
| 3771 | 3. n is the number of characters in this rule's character list. For the B in | ||
| 3772 | this rule, n = 1. For the C in this rule, n = 2. This can make sure "B < | ||
| 3773 | C". | ||
| 3774 | |||
| 3775 | It is the same thing that apply_secondary_shift_900() and | ||
| 3776 | apply_tertiary_shift_900() do, but on different weight levels. | ||
| 3777 | */ | ||
| 3778 | 38970 | static bool apply_primary_shift_900(MY_CHARSET_LOADER *loader, | |
| 3779 | MY_COLL_RULES *rules, MY_COLL_RULE *r, | ||
| 3780 | uint16 *to, size_t to_stride, | ||
| 3781 | size_t nweights, | ||
| 3782 | uint16 *const last_weight_ptr) { | ||
| 3783 | /* | ||
| 3784 | Find the second-to-last non-ignorable primary weight to apply shift, | ||
| 3785 | because the last one is the extra CE we added in my_char_weight_put_900(). | ||
| 3786 | */ | ||
| 3787 | 38970 | int last_sec_pri = 0; | |
| 3788 |
1/2✓ Branch 0 taken 38970 times.
✗ Branch 1 not taken.
|
38970 | for (last_sec_pri = nweights - 2; last_sec_pri >= 0; --last_sec_pri) { |
| 3789 |
1/2✓ Branch 0 taken 38970 times.
✗ Branch 1 not taken.
|
38970 | if (to[last_sec_pri * to_stride * MY_UCA_900_CE_SIZE]) break; |
| 3790 | } | ||
| 3791 |
1/2✓ Branch 0 taken 38970 times.
✗ Branch 1 not taken.
|
38970 | if (last_sec_pri >= 0) { |
| 3792 | 38970 | to[last_sec_pri * to_stride * MY_UCA_900_CE_SIZE]--; /* Reset before */ | |
| 3793 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 38970 times.
|
38970 | if (rules->shift_after_method == my_shift_method_expand) { |
| 3794 | /* | ||
| 3795 | Special case. Don't let characters shifted after X | ||
| 3796 | and before next(X) intermix to each other. | ||
| 3797 | |||
| 3798 | For example: | ||
| 3799 | "[shift-after-method expand] &0 < a &[before primary]1 < A". | ||
| 3800 | I.e. we reorder 'a' after '0', and then 'A' before '1'. | ||
| 3801 | 'a' must be sorted before 'A'. | ||
| 3802 | |||
| 3803 | Note, there are no real collations in CLDR which shift | ||
| 3804 | after and before two neighbouring characters. We need this | ||
| 3805 | just in case. Reserving 4096 (0x1000) weights for such | ||
| 3806 | cases is perfectly enough. | ||
| 3807 | */ | ||
| 3808 | /* W3-TODO: const may vary on levels 2,3*/ | ||
| 3809 | ✗ | last_weight_ptr[0] += 0x1000; | |
| 3810 | } | ||
| 3811 | } else { | ||
| 3812 | ✗ | loader->errcode = EE_FAILED_TO_RESET_BEFORE_PRIMARY_IGNORABLE_CHAR; | |
| 3813 | ✗ | snprintf(loader->errarg, sizeof(loader->errarg), "U+%04lX", r->base[0]); | |
| 3814 | ✗ | return true; | |
| 3815 | } | ||
| 3816 | 38970 | return false; | |
| 3817 | } | ||
| 3818 | |||
| 3819 | /* | ||
| 3820 | This is used to apply the weight shift if there is a [before 2] rule. Please | ||
| 3821 | see the comment on apply_primary_shift_900(). | ||
| 3822 | */ | ||
| 3823 | 8960 | static bool apply_secondary_shift_900(MY_CHARSET_LOADER *loader, | |
| 3824 | MY_COLL_RULES *rules, MY_COLL_RULE *r, | ||
| 3825 | uint16 *to, size_t to_stride, | ||
| 3826 | size_t nweights, | ||
| 3827 | uint16 *const last_weight_ptr) { | ||
| 3828 | /* | ||
| 3829 | Find the second-to-last non-ignorable secondary weight to apply shift, | ||
| 3830 | because the last one is the extra CE we added in my_char_weight_put_900(). | ||
| 3831 | */ | ||
| 3832 | int last_sec_sec; | ||
| 3833 |
1/2✓ Branch 0 taken 8960 times.
✗ Branch 1 not taken.
|
8960 | for (last_sec_sec = nweights - 2; last_sec_sec >= 0; --last_sec_sec) { |
| 3834 |
1/2✓ Branch 0 taken 8960 times.
✗ Branch 1 not taken.
|
8960 | if (to[last_sec_sec * MY_UCA_900_CE_SIZE * to_stride + to_stride]) break; |
| 3835 | } | ||
| 3836 |
1/2✓ Branch 0 taken 8960 times.
✗ Branch 1 not taken.
|
8960 | if (last_sec_sec >= 0) { |
| 3837 | // Reset before. | ||
| 3838 | 8960 | to[last_sec_sec * MY_UCA_900_CE_SIZE * to_stride + to_stride]--; | |
| 3839 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8960 times.
|
8960 | if (rules->shift_after_method == my_shift_method_expand) { |
| 3840 | /* | ||
| 3841 | Same reason as in apply_primary_shift_900(), reserve 256 (0x100) | ||
| 3842 | weights for secondary level. | ||
| 3843 | */ | ||
| 3844 | ✗ | last_weight_ptr[to_stride] += 0x100; | |
| 3845 | } | ||
| 3846 | } else { | ||
| 3847 | ✗ | loader->errcode = EE_FAILED_TO_RESET_BEFORE_SECONDARY_IGNORABLE_CHAR; | |
| 3848 | ✗ | snprintf(loader->errarg, sizeof(loader->errarg), "U+%04lX", r->base[0]); | |
| 3849 | ✗ | return true; | |
| 3850 | } | ||
| 3851 | 8960 | return false; | |
| 3852 | } | ||
| 3853 | |||
| 3854 | /* | ||
| 3855 | This is used to apply the weight shift if there is a [before 3] rule. Please | ||
| 3856 | see the comment on apply_primary_shift_900(). | ||
| 3857 | */ | ||
| 3858 | 204459 | static bool apply_tertiary_shift_900(MY_CHARSET_LOADER *loader, | |
| 3859 | MY_COLL_RULES *rules, MY_COLL_RULE *r, | ||
| 3860 | uint16 *to, size_t to_stride, | ||
| 3861 | size_t nweights, | ||
| 3862 | uint16 *const last_weight_ptr) { | ||
| 3863 | /* | ||
| 3864 | Find the second-to-last non-ignorable tertiary weight to apply shift, | ||
| 3865 | because the last one is the extra CE we added in my_char_weight_put_900(). | ||
| 3866 | */ | ||
| 3867 | int last_sec_ter; | ||
| 3868 |
1/2✓ Branch 0 taken 204459 times.
✗ Branch 1 not taken.
|
204459 | for (last_sec_ter = nweights - 2; last_sec_ter >= 0; --last_sec_ter) { |
| 3869 |
1/2✓ Branch 0 taken 204459 times.
✗ Branch 1 not taken.
|
204459 | if (to[last_sec_ter * MY_UCA_900_CE_SIZE * to_stride + 2 * to_stride]) |
| 3870 | 204459 | break; | |
| 3871 | } | ||
| 3872 |
1/2✓ Branch 0 taken 204459 times.
✗ Branch 1 not taken.
|
204459 | if (last_sec_ter >= 0) { |
| 3873 | // Reset before. | ||
| 3874 | 204459 | to[last_sec_ter * MY_UCA_900_CE_SIZE * to_stride + 2 * to_stride]--; | |
| 3875 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 204459 times.
|
204459 | if (rules->shift_after_method == my_shift_method_expand) { |
| 3876 | /* | ||
| 3877 | Same reason as in apply_primary_shift_900(), reserve 16 (0x10) | ||
| 3878 | weights for tertiary level. | ||
| 3879 | */ | ||
| 3880 | ✗ | last_weight_ptr[to_stride * 2] += 0x10; | |
| 3881 | } | ||
| 3882 | } else { | ||
| 3883 | ✗ | loader->errcode = EE_FAILED_TO_RESET_BEFORE_TERTIARY_IGNORABLE_CHAR; | |
| 3884 | ✗ | snprintf(loader->errarg, sizeof(loader->errarg), "U+%04lX", r->base[0]); | |
| 3885 | ✗ | return true; | |
| 3886 | } | ||
| 3887 | 204459 | return false; | |
| 3888 | } | ||
| 3889 | |||
| 3890 | 734767 | static bool apply_shift_900(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, | |
| 3891 | MY_COLL_RULE *r, uint16 *to, size_t to_stride, | ||
| 3892 | size_t nweights) { | ||
| 3893 | // nweights should not less than 1 because of the extra CE. | ||
| 3894 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 734767 times.
|
734767 | assert(nweights); |
| 3895 | // Apply level difference. | ||
| 3896 | 734767 | uint16 *const last_weight_ptr = | |
| 3897 | 734767 | to + (nweights - 1) * to_stride * MY_UCA_900_CE_SIZE; | |
| 3898 | 734767 | last_weight_ptr[0] += r->diff[0]; | |
| 3899 | 734767 | last_weight_ptr[to_stride] += r->diff[1]; | |
| 3900 | 734767 | last_weight_ptr[to_stride * 2] += r->diff[2]; | |
| 3901 |
2/2✓ Branch 0 taken 38970 times.
✓ Branch 1 taken 695797 times.
|
734767 | if (r->before_level == 1) // Apply "&[before primary]". |
| 3902 | 38970 | return apply_primary_shift_900(loader, rules, r, to, to_stride, nweights, | |
| 3903 | 38970 | last_weight_ptr); | |
| 3904 |
2/2✓ Branch 0 taken 8960 times.
✓ Branch 1 taken 686837 times.
|
695797 | else if (r->before_level == 2) // Apply "[before 2]". |
| 3905 | 8960 | return apply_secondary_shift_900(loader, rules, r, to, to_stride, nweights, | |
| 3906 | 8960 | last_weight_ptr); | |
| 3907 |
2/2✓ Branch 0 taken 204459 times.
✓ Branch 1 taken 482378 times.
|
686837 | else if (r->before_level == 3) // Apply "[before 3]". |
| 3908 | 204459 | return apply_tertiary_shift_900(loader, rules, r, to, to_stride, nweights, | |
| 3909 | 204459 | last_weight_ptr); | |
| 3910 | 482378 | return false; | |
| 3911 | } | ||
| 3912 | |||
| 3913 | 1250794 | static bool apply_shift(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, | |
| 3914 | MY_COLL_RULE *r, int level, uint16 *to, | ||
| 3915 | size_t to_stride, size_t nweights) { | ||
| 3916 |
2/2✓ Branch 0 taken 734767 times.
✓ Branch 1 taken 516027 times.
|
1250794 | if (rules->uca->version == UCA_V900) |
| 3917 | 734767 | return apply_shift_900(loader, rules, r, to, to_stride, nweights); | |
| 3918 | |||
| 3919 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 516027 times.
|
516027 | assert(to_stride == 1); |
| 3920 | |||
| 3921 | /* Apply level difference. */ | ||
| 3922 |
2/2✓ Branch 0 taken 512747 times.
✓ Branch 1 taken 3280 times.
|
516027 | if (nweights) { |
| 3923 | 512747 | to[nweights - 1] += r->diff[0]; | |
| 3924 |
2/2✓ Branch 0 taken 186 times.
✓ Branch 1 taken 512561 times.
|
512747 | if (r->before_level == 1) /* Apply "&[before primary]" */ |
| 3925 | { | ||
| 3926 |
2/2✓ Branch 0 taken 184 times.
✓ Branch 1 taken 2 times.
|
186 | if (nweights >= 2) { |
| 3927 | 184 | to[nweights - 2]--; /* Reset before */ | |
| 3928 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 128 times.
|
184 | if (rules->shift_after_method == my_shift_method_expand) { |
| 3929 | /* | ||
| 3930 | Special case. Don't let characters shifted after X | ||
| 3931 | and before next(X) intermix to each other. | ||
| 3932 | |||
| 3933 | For example: | ||
| 3934 | "[shift-after-method expand] &0 < a &[before primary]1 < A". | ||
| 3935 | I.e. we reorder 'a' after '0', and then 'A' before '1'. | ||
| 3936 | 'a' must be sorted before 'A'. | ||
| 3937 | |||
| 3938 | Note, there are no real collations in CLDR which shift | ||
| 3939 | after and before two neighbour characters. We need this | ||
| 3940 | just in case. Reserving 4096 (0x1000) weights for such | ||
| 3941 | cases is perfectly enough. | ||
| 3942 | */ | ||
| 3943 | /* W3-TODO: const may vary on levels 2,3*/ | ||
| 3944 | 56 | to[nweights - 1] += 0x1000; | |
| 3945 | } | ||
| 3946 | } else { | ||
| 3947 | 2 | loader->errcode = EE_FAILED_TO_RESET_BEFORE_PRIMARY_IGNORABLE_CHAR; | |
| 3948 | 2 | snprintf(loader->errarg, sizeof(loader->errarg), "U+%04lX", r->base[0]); | |
| 3949 | 2 | return true; | |
| 3950 | } | ||
| 3951 | } | ||
| 3952 | } else { | ||
| 3953 | /* Shift to an ignorable character, e.g.: & \u0000 < \u0001 */ | ||
| 3954 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3280 times.
|
3280 | assert(to[0] == 0); |
| 3955 | 3280 | to[0] = r->diff[level]; | |
| 3956 | } | ||
| 3957 | 516025 | return false; | |
| 3958 | } | ||
| 3959 | |||
| 3960 | 267155 | static MY_CONTRACTION *add_contraction_to_trie( | |
| 3961 | std::vector<MY_CONTRACTION> *cont_nodes, MY_COLL_RULE *r) { | ||
| 3962 | 267155 | MY_CONTRACTION new_node{0, {}, {}, {}, false, 0}; | |
| 3963 |
2/2✓ Branch 0 taken 204479 times.
✓ Branch 1 taken 62676 times.
|
267155 | if (r->with_context) // previous-context contraction |
| 3964 | { | ||
| 3965 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 204479 times.
|
204479 | assert(my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION) == 2); |
| 3966 | std::vector<MY_CONTRACTION>::iterator node_it = | ||
| 3967 |
1/2✓ Branch 0 taken 204479 times.
✗ Branch 1 not taken.
|
204479 | find_contraction_part_in_trie(*cont_nodes, r->curr[1]); |
| 3968 |
6/6✓ Branch 0 taken 203510 times.
✓ Branch 1 taken 969 times.
✓ Branch 2 taken 650 times.
✓ Branch 3 taken 202860 times.
✓ Branch 4 taken 1619 times.
✓ Branch 5 taken 202860 times.
|
204479 | if (node_it == cont_nodes->end() || node_it->ch != r->curr[1]) { |
| 3969 | 1619 | new_node.ch = r->curr[1]; | |
| 3970 |
1/2✓ Branch 0 taken 1619 times.
✗ Branch 1 not taken.
|
1619 | node_it = cont_nodes->insert(node_it, new_node); |
| 3971 | } | ||
| 3972 | 204479 | cont_nodes = &node_it->child_nodes_context; | |
| 3973 | |||
| 3974 |
1/2✓ Branch 0 taken 204479 times.
✗ Branch 1 not taken.
|
204479 | node_it = find_contraction_part_in_trie(*cont_nodes, r->curr[0]); |
| 3975 |
6/6✓ Branch 0 taken 175066 times.
✓ Branch 1 taken 29413 times.
✓ Branch 2 taken 173128 times.
✓ Branch 3 taken 1938 times.
✓ Branch 4 taken 202541 times.
✓ Branch 5 taken 1938 times.
|
204479 | if (node_it == cont_nodes->end() || node_it->ch != r->curr[0]) { |
| 3976 | 202541 | new_node.ch = r->curr[0]; | |
| 3977 |
1/2✓ Branch 0 taken 202541 times.
✗ Branch 1 not taken.
|
202541 | node_it = cont_nodes->insert(node_it, new_node); |
| 3978 | } | ||
| 3979 | 204479 | node_it->is_contraction_tail = true; | |
| 3980 | 204479 | node_it->contraction_len = 2; | |
| 3981 | 204479 | return &(*node_it); | |
| 3982 | } else // normal contraction | ||
| 3983 | { | ||
| 3984 | 62676 | size_t contraction_len = my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION); | |
| 3985 | 62676 | std::vector<MY_CONTRACTION>::iterator node_it; | |
| 3986 |
2/2✓ Branch 0 taken 137833 times.
✓ Branch 1 taken 62676 times.
|
200509 | for (size_t ch_ind = 0; ch_ind < contraction_len; ++ch_ind) { |
| 3987 |
1/2✓ Branch 0 taken 137833 times.
✗ Branch 1 not taken.
|
137833 | node_it = find_contraction_part_in_trie(*cont_nodes, r->curr[ch_ind]); |
| 3988 |
6/6✓ Branch 0 taken 77932 times.
✓ Branch 1 taken 59901 times.
✓ Branch 2 taken 42511 times.
✓ Branch 3 taken 35421 times.
✓ Branch 4 taken 102412 times.
✓ Branch 5 taken 35421 times.
|
137833 | if (node_it == cont_nodes->end() || node_it->ch != r->curr[ch_ind]) { |
| 3989 | 102412 | new_node.ch = r->curr[ch_ind]; | |
| 3990 |
1/2✓ Branch 0 taken 102412 times.
✗ Branch 1 not taken.
|
102412 | node_it = cont_nodes->insert(node_it, new_node); |
| 3991 | } | ||
| 3992 | 137833 | cont_nodes = &node_it->child_nodes; | |
| 3993 | } | ||
| 3994 | 62676 | node_it->is_contraction_tail = true; | |
| 3995 | 62676 | node_it->contraction_len = contraction_len; | |
| 3996 | 62676 | return &(*node_it); | |
| 3997 | } | ||
| 3998 | 267155 | } | |
| 3999 | |||
| 4000 | 1250794 | static bool apply_one_rule(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader, | |
| 4001 | MY_COLL_RULES *rules, MY_COLL_RULE *r, int level, | ||
| 4002 | MY_UCA_INFO *dst) { | ||
| 4003 | size_t nweights; | ||
| 4004 | 1250794 | size_t nreset = my_coll_rule_reset_length(r); /* Length of reset sequence */ | |
| 4005 | 1250794 | size_t nshift = my_coll_rule_shift_length(r); /* Length of shift sequence */ | |
| 4006 | uint16 *to, *to_num_ce; | ||
| 4007 | size_t to_stride; | ||
| 4008 | |||
| 4009 |
2/2✓ Branch 0 taken 267155 times.
✓ Branch 1 taken 983639 times.
|
1250794 | if (nshift >= 2) /* Contraction */ |
| 4010 | { | ||
| 4011 | size_t i; | ||
| 4012 | int flag; | ||
| 4013 | /* Add HEAD, MID and TAIL flags for the contraction parts */ | ||
| 4014 | 267155 | my_uca_add_contraction_flag( | |
| 4015 | dst->contraction_flags, r->curr[0], | ||
| 4016 |
2/2✓ Branch 0 taken 204479 times.
✓ Branch 1 taken 62676 times.
|
267155 | r->with_context ? MY_UCA_PREVIOUS_CONTEXT_HEAD : MY_UCA_CNT_HEAD); |
| 4017 |
2/2✓ Branch 0 taken 12481 times.
✓ Branch 1 taken 267155 times.
|
279636 | for (i = 1, flag = MY_UCA_CNT_MID1; i < nshift - 1; i++, flag <<= 1) |
| 4018 | 12481 | my_uca_add_contraction_flag(dst->contraction_flags, r->curr[i], flag); | |
| 4019 | 267155 | my_uca_add_contraction_flag( | |
| 4020 | dst->contraction_flags, r->curr[i], | ||
| 4021 |
2/2✓ Branch 0 taken 204479 times.
✓ Branch 1 taken 62676 times.
|
267155 | r->with_context ? MY_UCA_PREVIOUS_CONTEXT_TAIL : MY_UCA_CNT_TAIL); |
| 4022 | /* Add new contraction to the contraction list */ | ||
| 4023 | MY_CONTRACTION *trie_node = | ||
| 4024 | 267155 | add_contraction_to_trie(dst->contraction_nodes, r); | |
| 4025 | 267155 | to = trie_node->weight; | |
| 4026 | 267155 | to_stride = 1; | |
| 4027 | 267155 | to_num_ce = &to[MY_UCA_MAX_WEIGHT_SIZE - 1]; | |
| 4028 | /* Store weights of the "reset to" character */ | ||
| 4029 | nweights = | ||
| 4030 | 267155 | my_char_weight_put(dst, to, to_stride, MY_UCA_MAX_WEIGHT_SIZE - 1, | |
| 4031 | 267155 | to_num_ce, r, nreset, rules->uca->version); | |
| 4032 | } else { | ||
| 4033 | 983639 | my_wc_t pagec = (r->curr[0] >> 8); | |
| 4034 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 983639 times.
|
983639 | assert(dst->weights[pagec]); |
| 4035 |
3/4✓ Branch 0 taken 983639 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 493240 times.
✓ Branch 3 taken 490399 times.
|
983639 | if (cs->uca && cs->uca->version == UCA_V900) { |
| 4036 | 493240 | to = my_char_weight_addr_900(dst, r->curr[0]); | |
| 4037 | 493240 | to_stride = UCA900_DISTANCE_BETWEEN_LEVELS; | |
| 4038 | 493240 | to_num_ce = to - UCA900_DISTANCE_BETWEEN_LEVELS; | |
| 4039 | } else { | ||
| 4040 | 490399 | to = my_char_weight_addr(dst, r->curr[0]); | |
| 4041 | 490399 | to_stride = 1; | |
| 4042 | 490399 | to_num_ce = to + (dst->lengths[pagec] - 1); | |
| 4043 | } | ||
| 4044 | /* Store weights of the "reset to" character */ | ||
| 4045 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 983639 times.
|
983639 | if (dst->lengths[pagec] == 0) |
| 4046 | ✗ | nweights = 0; | |
| 4047 | else | ||
| 4048 | 983639 | nweights = my_char_weight_put(dst, to, to_stride, dst->lengths[pagec] - 1, | |
| 4049 | 983639 | to_num_ce, r, nreset, rules->uca->version); | |
| 4050 | } | ||
| 4051 | |||
| 4052 | 1250794 | change_weight_if_case_first(cs, dst, r, to, to_stride, nshift, nweights); | |
| 4053 | /* Apply level difference. */ | ||
| 4054 | 1250794 | return apply_shift(loader, rules, r, level, to, to_stride, nweights); | |
| 4055 | } | ||
| 4056 | |||
| 4057 | /** | ||
| 4058 | Check if collation rules are valid, | ||
| 4059 | i.e. characters are not outside of the collation supported range. | ||
| 4060 | */ | ||
| 4061 | 29006 | static int check_rules(MY_CHARSET_LOADER *loader, const MY_COLL_RULES *rules, | |
| 4062 | const MY_UCA_INFO *dst, const MY_UCA_INFO *src) { | ||
| 4063 | const MY_COLL_RULE *r, *rlast; | ||
| 4064 |
2/2✓ Branch 0 taken 1250794 times.
✓ Branch 1 taken 29006 times.
|
1279800 | for (r = rules->rule, rlast = rules->rule + rules->nrules; r < rlast; r++) { |
| 4065 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1250794 times.
|
1250794 | if (r->curr[0] > dst->maxchar) { |
| 4066 | ✗ | loader->errcode = EE_SHIFT_CHAR_OUT_OF_RANGE; | |
| 4067 | ✗ | snprintf(loader->errarg, sizeof(loader->errarg), "u%04X", | |
| 4068 | ✗ | (uint)r->curr[0]); | |
| 4069 | ✗ | return true; | |
| 4070 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1250794 times.
|
1250794 | } else if (r->base[0] > src->maxchar) { |
| 4071 | ✗ | loader->errcode = EE_RESET_CHAR_OUT_OF_RANGE; | |
| 4072 | ✗ | snprintf(loader->errarg, sizeof(loader->errarg), "u%04X", | |
| 4073 | ✗ | (uint)r->base[0]); | |
| 4074 | ✗ | return true; | |
| 4075 | } | ||
| 4076 | } | ||
| 4077 | 29006 | return false; | |
| 4078 | } | ||
| 4079 | |||
| 4080 | 9359 | static void synthesize_lengths_900(uchar *lengths, const uint16 *const *weights, | |
| 4081 | uint npages) { | ||
| 4082 |
2/2✓ Branch 0 taken 40730368 times.
✓ Branch 1 taken 9359 times.
|
40739727 | for (uint page = 0; page < npages; ++page) { |
| 4083 | 40730368 | int max_len = 0; | |
| 4084 |
2/2✓ Branch 0 taken 1396105 times.
✓ Branch 1 taken 39334263 times.
|
40730368 | if (weights[page]) { |
| 4085 |
2/2✓ Branch 0 taken 357402880 times.
✓ Branch 1 taken 1396105 times.
|
358798985 | for (uint code = 0; code < 256; ++code) { |
| 4086 | 357402880 | max_len = std::max<int>(max_len, weights[page][code]); | |
| 4087 | } | ||
| 4088 | } | ||
| 4089 |
2/2✓ Branch 0 taken 39334263 times.
✓ Branch 1 taken 1396105 times.
|
40730368 | if (max_len == 0) |
| 4090 | 39334263 | lengths[page] = 0; | |
| 4091 | else | ||
| 4092 | 1396105 | lengths[page] = max_len * MY_UCA_900_CE_SIZE + 1; | |
| 4093 | } | ||
| 4094 | 9359 | } | |
| 4095 | |||
| 4096 | 28846 | static void copy_ja_han_pages(const CHARSET_INFO *cs, MY_UCA_INFO *dst) { | |
| 4097 |
3/4✓ Branch 0 taken 28846 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 9199 times.
✓ Branch 3 taken 19647 times.
|
28846 | if (!cs->uca || cs->uca->version != UCA_V900 || |
| 4098 |
2/2✓ Branch 0 taken 8876 times.
✓ Branch 1 taken 323 times.
|
9199 | cs->coll_param != &ja_coll_param) |
| 4099 | 28523 | return; | |
| 4100 |
2/2✓ Branch 0 taken 26486 times.
✓ Branch 1 taken 323 times.
|
26809 | for (int page = MIN_JA_HAN_PAGE; page <= MAX_JA_HAN_PAGE; page++) { |
| 4101 | // In DUCET, weight is not assigned to code points in [U+4E00, U+9FFF]. | ||
| 4102 | // When re-initializing (after my_coll_uninit_uca), the weights | ||
| 4103 | // may already be set. | ||
| 4104 |
3/4✓ Branch 0 taken 738 times.
✓ Branch 1 taken 25748 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 738 times.
|
26486 | assert(dst->weights[page] == nullptr || |
| 4105 | dst->weights[page] == ja_han_pages[page - MIN_JA_HAN_PAGE]); | ||
| 4106 | 26486 | dst->weights[page] = ja_han_pages[page - MIN_JA_HAN_PAGE]; | |
| 4107 | } | ||
| 4108 | } | ||
| 4109 | |||
| 4110 | /* | ||
| 4111 | We have reordered all the characters in the pages which contains Chinese Han | ||
| 4112 | characters with uca9dump (see dump_zh_pages() in uca9-dump.cc). Replace the | ||
| 4113 | DUCET pages with these pages. | ||
| 4114 | */ | ||
| 4115 | 160 | static void copy_zh_han_pages(MY_UCA_INFO *dst) { | |
| 4116 |
2/2✓ Branch 0 taken 107680 times.
✓ Branch 1 taken 160 times.
|
107840 | for (int page = MIN_ZH_HAN_PAGE; page <= MAX_ZH_HAN_PAGE; page++) { |
| 4117 |
2/2✓ Branch 0 taken 47200 times.
✓ Branch 1 taken 60480 times.
|
107680 | if (zh_han_pages[page - MIN_ZH_HAN_PAGE]) { |
| 4118 | 47200 | dst->weights[page] = zh_han_pages[page - MIN_ZH_HAN_PAGE]; | |
| 4119 | } | ||
| 4120 | } | ||
| 4121 | 160 | } | |
| 4122 | |||
| 4123 | /* | ||
| 4124 | UCA defines an algorithm to calculate character's implicit weight if this | ||
| 4125 | character's weight is not defined in the DUCET. This function is to help | ||
| 4126 | convert Chinese character's implicit weight calculated by UCA back to its code | ||
| 4127 | points. | ||
| 4128 | The implicit weight and the code point is not 1 : 1 map because DUCET lets | ||
| 4129 | some characters share implicit primary weight. For example, the DUCET defines | ||
| 4130 | "2F00 ; [.FB40.0020.0004][.CE00.0000.0000] # KANGXI RADICAL ONE", and 4E00's | ||
| 4131 | implicit weight is [.FB40.0020.0002][.CE00.0000.0000]. We can see the primary | ||
| 4132 | weights of U+2F00 and U+4E00 are same (FB40 CE00). | ||
| 4133 | |||
| 4134 | But for the Han characters in zh.xml file, each one has unique implicit | ||
| 4135 | weight. | ||
| 4136 | */ | ||
| 4137 | 498638 | static inline my_wc_t convert_implicit_to_ch(uint16 first, uint16 second) { | |
| 4138 | /* | ||
| 4139 | For reference, here is how UCA calculates one character's implicit weight. | ||
| 4140 | AAAA = 0xFB40 + (CP >> 15) # The 0xFB40 changes for different character | ||
| 4141 | # groups | ||
| 4142 | BBBB = (CP & 0x7FFF) | 0x8000 | ||
| 4143 | */ | ||
| 4144 |
2/2✓ Branch 0 taken 201280 times.
✓ Branch 1 taken 297358 times.
|
498638 | if (first < 0xFB80) |
| 4145 | 201280 | return (((first - 0xFB40) << 15) | (second & 0x7FFF)); | |
| 4146 |
2/2✓ Branch 0 taken 28574 times.
✓ Branch 1 taken 268784 times.
|
297358 | else if (first < 0xFBC0) |
| 4147 | 28574 | return (((first - 0xFB80) << 15) | (second & 0x7FFF)); | |
| 4148 | else | ||
| 4149 | 268784 | return (((first - 0xFBC0) << 15) | (second & 0x7FFF)); | |
| 4150 | } | ||
| 4151 | |||
| 4152 | /* | ||
| 4153 | Usually we do reordering in apply_reorder_param(). But for the Chinese | ||
| 4154 | collation, since we want to remove the weight gap between the character groups | ||
| 4155 | (see the comment on change_zh_implicit()), and we have done the reordering for | ||
| 4156 | some characters in the pages which contains Chinese Han characters, if we | ||
| 4157 | still use apply_reorder_param() to do the reordering for other characters, we | ||
| 4158 | might meet weight conflict. For example, in the DUCET page, 'A' has primary | ||
| 4159 | weight 0x1C47, but this value has been assigned to the first Chinese Han | ||
| 4160 | character in CLDR zh.xml file. | ||
| 4161 | So we do the reordering for all the DUCET pages when initializing the | ||
| 4162 | collation. | ||
| 4163 | */ | ||
| 4164 | 160 | static void modify_all_zh_pages(Reorder_param *reorder_param, MY_UCA_INFO *dst, | |
| 4165 | int npages) { | ||
| 4166 | 160 | std::map<int, int> zh_han_to_single_weight_map; | |
| 4167 |
2/2✓ Branch 0 taken 6613760 times.
✓ Branch 1 taken 160 times.
|
6613920 | for (int i = 0; i < ZH_HAN_WEIGHT_PAIRS; i++) { |
| 4168 | 6613760 | zh_han_to_single_weight_map[zh_han_to_single_weight[i * 2]] = | |
| 4169 |
1/2✓ Branch 0 taken 6613760 times.
✗ Branch 1 not taken.
|
6613760 | zh_han_to_single_weight[i * 2 + 1]; |
| 4170 | } | ||
| 4171 | |||
| 4172 |
2/2✓ Branch 0 taken 696320 times.
✓ Branch 1 taken 160 times.
|
696480 | for (int page = 0; page < npages; page++) { |
| 4173 | /* | ||
| 4174 | If there is no page in the DUCET, then all the characters in this page | ||
| 4175 | must have implicit weight. The reordering for it will be done by | ||
| 4176 | change_zh_implicit(). Do not need to change here. | ||
| 4177 | If there is page in zh_han_pages[], then all the characters in this page | ||
| 4178 | have been reordered by uca9dump. Do not need to change here. | ||
| 4179 | */ | ||
| 4180 |
2/2✓ Branch 0 taken 24716 times.
✓ Branch 1 taken 671604 times.
|
696320 | if (!dst->weights[page] || |
| 4181 |
4/4✓ Branch 0 taken 17356 times.
✓ Branch 1 taken 7360 times.
✓ Branch 2 taken 16556 times.
✓ Branch 3 taken 800 times.
|
24716 | (page >= MIN_ZH_HAN_PAGE && page <= MAX_ZH_HAN_PAGE && |
| 4182 |
2/2✓ Branch 0 taken 1356 times.
✓ Branch 1 taken 15200 times.
|
16556 | zh_han_pages[page - MIN_ZH_HAN_PAGE])) |
| 4183 | 672960 | continue; | |
| 4184 |
2/2✓ Branch 0 taken 5980160 times.
✓ Branch 1 taken 23360 times.
|
6003520 | for (int off = 0; off < 256; off++) { |
| 4185 | 5980160 | uint16 *wbeg = UCA900_WEIGHT_ADDR(dst->weights[page], 0, off); | |
| 4186 | 5980160 | int num_of_ce = UCA900_NUM_OF_CE(dst->weights[page], off); | |
| 4187 |
2/2✓ Branch 0 taken 6612156 times.
✓ Branch 1 taken 5980160 times.
|
12592316 | for (int ce = 0; ce < num_of_ce; ce++) { |
| 4188 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6612156 times.
|
6612156 | assert(reorder_param->wt_rec_num == 1); |
| 4189 |
2/2✓ Branch 0 taken 4781389 times.
✓ Branch 1 taken 1830767 times.
|
6612156 | if (*wbeg >= reorder_param->wt_rec[0].old_wt_bdy.begin && |
| 4190 |
2/2✓ Branch 0 taken 3200440 times.
✓ Branch 1 taken 1580949 times.
|
4781389 | *wbeg <= reorder_param->wt_rec[0].old_wt_bdy.end) { |
| 4191 | 3200440 | *wbeg = *wbeg + reorder_param->wt_rec[0].new_wt_bdy.begin - | |
| 4192 | 3200440 | reorder_param->wt_rec[0].old_wt_bdy.begin; | |
| 4193 |
2/2✓ Branch 0 taken 1469469 times.
✓ Branch 1 taken 1942247 times.
|
3411716 | } else if (*wbeg >= 0xFB00) { |
| 4194 | 1469469 | uint16 next_wt = *(wbeg + UCA900_DISTANCE_BETWEEN_WEIGHTS); | |
| 4195 |
4/4✓ Branch 0 taken 1469409 times.
✓ Branch 1 taken 60 times.
✓ Branch 2 taken 498638 times.
✓ Branch 3 taken 970771 times.
|
1469469 | if (*wbeg >= 0xFB40 && *wbeg <= 0xFBC1) { // Han's implicit weight |
| 4196 | /* | ||
| 4197 | If some characters in DUCET share the same implicit weight, their | ||
| 4198 | reordered weight should be same too. | ||
| 4199 | */ | ||
| 4200 | 498638 | my_wc_t ch = convert_implicit_to_ch(*wbeg, next_wt); | |
| 4201 |
1/2✓ Branch 0 taken 498638 times.
✗ Branch 1 not taken.
|
498638 | if (zh_han_to_single_weight_map.find(ch) != |
| 4202 |
2/2✓ Branch 0 taken 215721 times.
✓ Branch 1 taken 282917 times.
|
997276 | zh_han_to_single_weight_map.end()) { |
| 4203 |
1/2✓ Branch 0 taken 215721 times.
✗ Branch 1 not taken.
|
215721 | *wbeg = zh_han_to_single_weight_map[ch]; |
| 4204 | 215721 | *(wbeg + UCA900_DISTANCE_BETWEEN_WEIGHTS) = 0; | |
| 4205 | 215721 | wbeg += UCA900_DISTANCE_BETWEEN_WEIGHTS; | |
| 4206 | 215721 | ce++; | |
| 4207 | 215721 | continue; | |
| 4208 | } | ||
| 4209 | } | ||
| 4210 | 1253748 | *wbeg = change_zh_implicit(*wbeg); | |
| 4211 | 1253748 | wbeg += UCA900_DISTANCE_BETWEEN_WEIGHTS; | |
| 4212 | 1253748 | ce++; | |
| 4213 | } | ||
| 4214 | 6396435 | wbeg += UCA900_DISTANCE_BETWEEN_WEIGHTS; | |
| 4215 | } | ||
| 4216 | } | ||
| 4217 | } | ||
| 4218 | 160 | } | |
| 4219 | |||
| 4220 | 29006 | static bool init_weight_level(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader, | |
| 4221 | MY_COLL_RULES *rules, int level, MY_UCA_INFO *dst, | ||
| 4222 | const MY_UCA_INFO *src, | ||
| 4223 | bool lengths_are_temporary) { | ||
| 4224 | MY_COLL_RULE *r, *rlast; | ||
| 4225 | 29006 | size_t i, npages = (src->maxchar + 1) / 256; | |
| 4226 | 29006 | bool has_contractions = false; | |
| 4227 | |||
| 4228 | 29006 | dst->maxchar = src->maxchar; | |
| 4229 | |||
| 4230 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 29006 times.
|
29006 | if (check_rules(loader, rules, dst, src)) return true; |
| 4231 | |||
| 4232 | /* Allocate memory for pages and their lengths */ | ||
| 4233 |
2/2✓ Branch 0 taken 9359 times.
✓ Branch 1 taken 19647 times.
|
29006 | if (lengths_are_temporary) { |
| 4234 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9359 times.
|
9359 | if (!(dst->lengths = (uchar *)(loader->mem_malloc)(npages))) return true; |
| 4235 | 9359 | if (!(dst->weights = | |
| 4236 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9359 times.
|
9359 | (uint16 **)(loader->once_alloc)(npages * sizeof(uint16 *)))) { |
| 4237 | ✗ | (loader->mem_free)(dst->lengths); | |
| 4238 | ✗ | return true; | |
| 4239 | } | ||
| 4240 | } else { | ||
| 4241 |
2/4✓ Branch 0 taken 19647 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 19647 times.
|
39294 | if (!(dst->lengths = (uchar *)(loader->once_alloc)(npages)) || |
| 4242 | 19647 | !(dst->weights = | |
| 4243 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 19647 times.
|
19647 | (uint16 **)(loader->once_alloc)(npages * sizeof(uint16 *)))) |
| 4244 | ✗ | return true; | |
| 4245 | } | ||
| 4246 | |||
| 4247 | /* | ||
| 4248 | Copy pages lengths and page pointers from the default UCA weights. | ||
| 4249 | */ | ||
| 4250 | 29006 | memcpy(dst->lengths, src->lengths, npages); | |
| 4251 | 29006 | memcpy(dst->weights, src->weights, npages * sizeof(uint16 *)); | |
| 4252 | |||
| 4253 | /* | ||
| 4254 | Calculate maximum lengths for the pages which will be overwritten. | ||
| 4255 | Mark pages that will be overwritten as NULL. | ||
| 4256 | We'll allocate their own memory. | ||
| 4257 | */ | ||
| 4258 |
2/2✓ Branch 0 taken 1250794 times.
✓ Branch 1 taken 29006 times.
|
1279800 | for (r = rules->rule, rlast = rules->rule + rules->nrules; r < rlast; r++) { |
| 4259 |
2/2✓ Branch 0 taken 983639 times.
✓ Branch 1 taken 267155 times.
|
1250794 | if (!r->curr[1]) /* If not a contraction */ |
| 4260 | { | ||
| 4261 | 983639 | uint pagec = (r->curr[0] >> 8); | |
| 4262 |
2/2✓ Branch 0 taken 218773 times.
✓ Branch 1 taken 764866 times.
|
983639 | if (r->base[1]) /* Expansion */ |
| 4263 | { | ||
| 4264 | /* Reserve space for maximum possible length */ | ||
| 4265 | 218773 | dst->lengths[pagec] = MY_UCA_MAX_WEIGHT_SIZE; | |
| 4266 | } else { | ||
| 4267 | 764866 | uint pageb = (r->base[0] >> 8); | |
| 4268 |
6/6✓ Branch 0 taken 251626 times.
✓ Branch 1 taken 513240 times.
✓ Branch 2 taken 162963 times.
✓ Branch 3 taken 88663 times.
✓ Branch 4 taken 53292 times.
✓ Branch 5 taken 109671 times.
|
764866 | if ((r->diff[0] || r->diff[1] || r->diff[2]) && |
| 4269 |
2/2✓ Branch 0 taken 37200 times.
✓ Branch 1 taken 617995 times.
|
655195 | dst->lengths[pagec] < (src->lengths[pageb] + 3)) { |
| 4270 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 37200 times.
|
37200 | if ((src->lengths[pageb] + 3) > MY_UCA_MAX_WEIGHT_SIZE) |
| 4271 | ✗ | dst->lengths[pagec] = MY_UCA_MAX_WEIGHT_SIZE; | |
| 4272 | else | ||
| 4273 | 37200 | dst->lengths[pagec] = src->lengths[pageb] + 3; | |
| 4274 |
2/2✓ Branch 0 taken 648 times.
✓ Branch 1 taken 727018 times.
|
727666 | } else if (dst->lengths[pagec] < src->lengths[pageb]) |
| 4275 | 648 | dst->lengths[pagec] = src->lengths[pageb]; | |
| 4276 | } | ||
| 4277 | 983639 | dst->weights[pagec] = nullptr; /* Mark that we'll overwrite this page */ | |
| 4278 | } else | ||
| 4279 | 267155 | has_contractions = true; | |
| 4280 | } | ||
| 4281 | |||
| 4282 |
2/2✓ Branch 0 taken 8844 times.
✓ Branch 1 taken 20162 times.
|
29006 | if (has_contractions) { |
| 4283 | 8844 | dst->have_contractions = true; | |
| 4284 |
2/4✓ Branch 0 taken 8844 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8844 times.
✗ Branch 3 not taken.
|
8844 | dst->contraction_nodes = new std::vector<MY_CONTRACTION>(0); |
| 4285 | 8844 | if (!(dst->contraction_flags = | |
| 4286 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8844 times.
|
8844 | (char *)(loader->once_alloc)(MY_UCA_CNT_FLAG_SIZE))) |
| 4287 | ✗ | return true; | |
| 4288 | 8844 | memset(dst->contraction_flags, 0, MY_UCA_CNT_FLAG_SIZE); | |
| 4289 | } | ||
| 4290 |
2/2✓ Branch 0 taken 160 times.
✓ Branch 1 taken 28846 times.
|
29006 | if (cs->coll_param == &zh_coll_param) { |
| 4291 | /* | ||
| 4292 | We are going to reorder the weight of characters in uca pages when | ||
| 4293 | initializing this collation. And because of the reorder rule [reorder | ||
| 4294 | Hani], we need to change almost every character's weight. So copy all | ||
| 4295 | the pages. | ||
| 4296 | Please also see the comment on modify_all_zh_pages(). | ||
| 4297 | */ | ||
| 4298 | bool rc; | ||
| 4299 |
2/2✓ Branch 0 taken 696320 times.
✓ Branch 1 taken 160 times.
|
696480 | for (i = 0; i < npages; i++) { |
| 4300 |
4/6✓ Branch 0 taken 24716 times.
✓ Branch 1 taken 671604 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 24716 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 696320 times.
|
696320 | if (dst->lengths[i] && (rc = my_uca_copy_page(cs, loader, src, dst, i))) |
| 4301 | ✗ | return rc; | |
| 4302 | } | ||
| 4303 | 160 | modify_all_zh_pages(cs->coll_param->reorder_param, dst, npages); | |
| 4304 | 160 | copy_zh_han_pages(dst); | |
| 4305 | } else { | ||
| 4306 | /* Allocate pages that we'll overwrite and copy default weights */ | ||
| 4307 |
2/2✓ Branch 0 taken 49044992 times.
✓ Branch 1 taken 28846 times.
|
49073838 | for (i = 0; i < npages; i++) { |
| 4308 | bool rc; | ||
| 4309 | /* | ||
| 4310 | Don't touch pages with lengths[i]==0, they have implicit weights | ||
| 4311 | calculated algorithmically. | ||
| 4312 | */ | ||
| 4313 |
5/6✓ Branch 0 taken 46520142 times.
✓ Branch 1 taken 2524850 times.
✓ Branch 2 taken 56590 times.
✓ Branch 3 taken 46463552 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 49044992 times.
|
49101582 | if (!dst->weights[i] && dst->lengths[i] && |
| 4314 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 56590 times.
|
56590 | (rc = my_uca_copy_page(cs, loader, src, dst, i))) |
| 4315 | ✗ | return rc; | |
| 4316 | } | ||
| 4317 | |||
| 4318 | 28846 | copy_ja_han_pages(cs, dst); | |
| 4319 | } | ||
| 4320 | |||
| 4321 | /* | ||
| 4322 | Preparatory step is done at this point. | ||
| 4323 | Now we have memory allocated for the pages that we'll overwrite, | ||
| 4324 | and for contractions, including previous context contractions. | ||
| 4325 | Also, for the pages that we'll overwrite, we have copied default weights. | ||
| 4326 | Now iterate through the rules, overwrite weights for the characters | ||
| 4327 | that appear in the rules, and put all contractions into contraction list. | ||
| 4328 | */ | ||
| 4329 |
2/2✓ Branch 0 taken 1250794 times.
✓ Branch 1 taken 29004 times.
|
1279798 | for (r = rules->rule; r < rlast; r++) { |
| 4330 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1250792 times.
|
1250794 | if (apply_one_rule(cs, loader, rules, r, level, dst)) return true; |
| 4331 | } | ||
| 4332 | 29004 | return false; | |
| 4333 | } | ||
| 4334 | |||
| 4335 | /** | ||
| 4336 | Check whether the composition character is already in rule list | ||
| 4337 | @param rules The rule list | ||
| 4338 | @param wc The composition character | ||
| 4339 | @return true The composition character is already in list | ||
| 4340 | false The composition character is not in list | ||
| 4341 | */ | ||
| 4342 | 578607 | static bool my_comp_in_rulelist(const MY_COLL_RULES *rules, my_wc_t wc) { | |
| 4343 | MY_COLL_RULE *r, *rlast; | ||
| 4344 |
2/2✓ Branch 0 taken 100543160 times.
✓ Branch 1 taken 468377 times.
|
101011537 | for (r = rules->rule, rlast = rules->rule + rules->nrules; r < rlast; r++) { |
| 4345 |
4/4✓ Branch 0 taken 138654 times.
✓ Branch 1 taken 100404506 times.
✓ Branch 2 taken 110230 times.
✓ Branch 3 taken 28424 times.
|
100543160 | if (r->curr[0] == wc && r->curr[1] == 0) return true; |
| 4346 | } | ||
| 4347 | 468377 | return false; | |
| 4348 | } | ||
| 4349 | |||
| 4350 | /** | ||
| 4351 | Check whether a composition character in the decomposition list is a | ||
| 4352 | normal character. | ||
| 4353 | @param dec_ind The index of composition character in list | ||
| 4354 | @return Whether it is a normal character | ||
| 4355 | */ | ||
| 4356 | 1761683638 | static inline bool my_compchar_is_normal_char(uint dec_ind) { | |
| 4357 | 1761683638 | return uni_dec[dec_ind].decomp_tag == DECOMP_TAG_NONE; | |
| 4358 | } | ||
| 4359 | |||
| 4360 | 904076 | static inline bool my_compchar_is_normal_char(const Unidata_decomp *decomp) { | |
| 4361 | 904076 | return my_compchar_is_normal_char(decomp - std::begin(uni_dec)); | |
| 4362 | } | ||
| 4363 | |||
| 4364 | 307721 | static Unidata_decomp *get_decomposition(my_wc_t ch) { | |
| 4365 | 3883031 | auto comp_func = [](Unidata_decomp x, Unidata_decomp y) { | |
| 4366 | 3883031 | return x.charcode < y.charcode; | |
| 4367 | }; | ||
| 4368 | 307721 | Unidata_decomp to_find = {ch, CHAR_CATEGORY_LU, DECOMP_TAG_NONE, {0}}; | |
| 4369 |
1/2✓ Branch 0 taken 307721 times.
✗ Branch 1 not taken.
|
307721 | Unidata_decomp *decomp = std::lower_bound( |
| 4370 | std::begin(uni_dec), std::end(uni_dec), to_find, comp_func); | ||
| 4371 |
5/6✓ Branch 0 taken 307721 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 49383 times.
✓ Branch 3 taken 258338 times.
✓ Branch 4 taken 49383 times.
✓ Branch 5 taken 258338 times.
|
307721 | if (decomp == std::end(uni_dec) || decomp->charcode != ch) return nullptr; |
| 4372 | 258338 | return decomp; | |
| 4373 | } | ||
| 4374 | |||
| 4375 | 3645724 | static Combining_mark *my_find_combining_mark(my_wc_t code) { | |
| 4376 | 35679820 | auto comp_func = [](Combining_mark x, Combining_mark y) { | |
| 4377 | 35679820 | return x.charcode < y.charcode; | |
| 4378 | }; | ||
| 4379 | 3645724 | Combining_mark to_find = {code, 0}; | |
| 4380 |
1/2✓ Branch 0 taken 3645724 times.
✗ Branch 1 not taken.
|
3645724 | return std::lower_bound(std::begin(combining_marks), |
| 4381 | 7291448 | std::end(combining_marks), to_find, comp_func); | |
| 4382 | } | ||
| 4383 | |||
| 4384 | /** | ||
| 4385 | Check if a list of combining marks contains the whole list of origin | ||
| 4386 | decomposed combining marks. | ||
| 4387 | @param origin_dec The origin list of combining marks decomposed from | ||
| 4388 | character in tailoring rule. | ||
| 4389 | @param dec_codes The list of combining marks decomposed from | ||
| 4390 | character in decomposition list. | ||
| 4391 | @param dec_diff The combining marks exist in dec_codes but not in | ||
| 4392 | origin_dec. | ||
| 4393 | @return Whether the list of combining marks contains the | ||
| 4394 | whole list of origin combining marks. | ||
| 4395 | */ | ||
| 4396 | 284165408 | static bool my_is_inheritance_of_origin(const my_wc_t *origin_dec, | |
| 4397 | const my_wc_t *dec_codes, | ||
| 4398 | my_wc_t *dec_diff) { | ||
| 4399 | int ind0, ind1, ind2; | ||
| 4400 |
2/2✓ Branch 0 taken 282344219 times.
✓ Branch 1 taken 1821189 times.
|
284165408 | if (origin_dec[0] != dec_codes[0]) return false; |
| 4401 |
1/2✓ Branch 0 taken 2710035 times.
✗ Branch 1 not taken.
|
4531224 | for (ind0 = ind1 = ind2 = 1; ind0 < MY_UCA_MAX_CONTRACTION && |
| 4402 | 2710035 | ind1 < MY_UCA_MAX_CONTRACTION && | |
| 4403 |
5/6✓ Branch 0 taken 2710035 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2456908 times.
✓ Branch 3 taken 253127 times.
✓ Branch 4 taken 2005938 times.
✓ Branch 5 taken 450970 times.
|
5420070 | origin_dec[ind0] && dec_codes[ind1];) { |
| 4404 |
2/2✓ Branch 0 taken 183076 times.
✓ Branch 1 taken 1822862 times.
|
2005938 | if (origin_dec[ind0] == dec_codes[ind1]) { |
| 4405 | 183076 | ind0++; | |
| 4406 | 183076 | ind1++; | |
| 4407 | } else { | ||
| 4408 | 1822862 | Combining_mark *mark0 = my_find_combining_mark(origin_dec[ind0]); | |
| 4409 | 1822862 | Combining_mark *mark1 = my_find_combining_mark(dec_codes[ind1]); | |
| 4410 |
2/2✓ Branch 0 taken 1117092 times.
✓ Branch 1 taken 705770 times.
|
1822862 | if (mark0->ccc == mark1->ccc) return false; |
| 4411 | 705770 | dec_diff[ind2++] = dec_codes[ind1++]; | |
| 4412 | } | ||
| 4413 | } | ||
| 4414 |
3/4✓ Branch 0 taken 704097 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 253127 times.
✓ Branch 3 taken 450970 times.
|
704097 | if (ind0 >= MY_UCA_MAX_CONTRACTION || !origin_dec[ind0]) { |
| 4415 |
2/2✓ Branch 0 taken 1075303 times.
✓ Branch 1 taken 253127 times.
|
1328430 | while (ind1 < MY_UCA_MAX_CONTRACTION) { |
| 4416 | 1075303 | dec_diff[ind2++] = dec_codes[ind1++]; | |
| 4417 | } | ||
| 4418 | 253127 | return true; | |
| 4419 | } | ||
| 4420 | 450970 | return false; | |
| 4421 | } | ||
| 4422 | |||
| 4423 | /** | ||
| 4424 | Add new rules recersively if one rule's characters are in decomposition | ||
| 4425 | list. | ||
| 4426 | @param rules The rule list | ||
| 4427 | @param r The rule to check | ||
| 4428 | @param decomp_rec The decomposition of the character in rule. | ||
| 4429 | @param comp_added Bitset which marks whether the comp | ||
| 4430 | character has been added to rule list. | ||
| 4431 | @return 1 Error adding new rules | ||
| 4432 | 0 Add rules successfully | ||
| 4433 | */ | ||
| 4434 | 307721 | static int my_coll_add_inherit_rules( | |
| 4435 | MY_COLL_RULES *rules, MY_COLL_RULE *r, const Unidata_decomp *decomp_rec, | ||
| 4436 | std::bitset<array_elements(uni_dec)> *comp_added) { | ||
| 4437 |
2/2✓ Branch 0 taken 1760779562 times.
✓ Branch 1 taken 307721 times.
|
1761087283 | for (uint dec_ind = 0; dec_ind < array_elements(uni_dec); dec_ind++) { |
| 4438 | /* | ||
| 4439 | For normal character which can be decomposed, it is always decomposed to | ||
| 4440 | be another character and one combining mark. | ||
| 4441 | |||
| 4442 | Currently we only support the weight inheritance of character that can be | ||
| 4443 | canonical-decomposed to another character and a list of combining marks. | ||
| 4444 | So skip the compatibility decomposition. | ||
| 4445 | |||
| 4446 | Sample from UnicodeData.txt: | ||
| 4447 | Canonical decomposition: U+00DC : U+0055 U+0308 | ||
| 4448 | Compatibility decompsition: U+FF59 : <wide> U+0079 | ||
| 4449 | */ | ||
| 4450 |
9/10✓ Branch 0 taken 633905260 times.
✓ Branch 1 taken 1126874302 times.
✓ Branch 2 taken 633905260 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 624302842 times.
✓ Branch 5 taken 9602418 times.
✓ Branch 6 taken 523209339 times.
✓ Branch 7 taken 101093503 times.
✓ Branch 8 taken 1476614154 times.
✓ Branch 9 taken 284165408 times.
|
2283988901 | if (!my_compchar_is_normal_char(dec_ind) || comp_added->test(dec_ind) || |
| 4451 | 523209339 | (decomp_rec != nullptr && | |
| 4452 |
2/2✓ Branch 0 taken 340137434 times.
✓ Branch 1 taken 183071905 times.
|
523209339 | uni_dec[dec_ind].decomp_tag != decomp_rec->decomp_tag)) |
| 4453 | 1476614154 | continue; | |
| 4454 | /* | ||
| 4455 | In DUCET, all accented character's weight is defined as base | ||
| 4456 | character's weight followed by accent mark's weight. For example: | ||
| 4457 | 00DC = 0055 + 0308 | ||
| 4458 | 0055 ; [.1E30.0020.0008] # LATIN CAPITAL LETTER U | ||
| 4459 | 0308 ; [.0000.002B.0002] # COMBINING DIAERESIS | ||
| 4460 | 00DC ; [.1E30.0020.0008][.0000.002B.0002] # LATIN CAPITAL LETTER U | ||
| 4461 | WITH DIAERESIS | ||
| 4462 | So the composition character's rule should be same as origin rule | ||
| 4463 | except of the change of curr value. | ||
| 4464 | */ | ||
| 4465 | 284165408 | my_wc_t dec_diff[MY_UCA_MAX_CONTRACTION]{r->curr[0], 0}; | |
| 4466 | 284165408 | my_wc_t orig_dec[MY_UCA_MAX_CONTRACTION]{0}; | |
| 4467 |
2/2✓ Branch 0 taken 101093503 times.
✓ Branch 1 taken 183071905 times.
|
284165408 | if (decomp_rec == nullptr) { |
| 4468 | /* | ||
| 4469 | If there is no decomposition record found in Unidata_decomp, it means | ||
| 4470 | its decomposition form is itself. | ||
| 4471 | */ | ||
| 4472 | 101093503 | orig_dec[0] = r->curr[0]; | |
| 4473 | } else { | ||
| 4474 | 183071905 | memcpy(orig_dec, decomp_rec->dec_codes, sizeof(orig_dec)); | |
| 4475 | } | ||
| 4476 |
1/2✓ Branch 0 taken 284165408 times.
✗ Branch 1 not taken.
|
284165408 | if (my_is_inheritance_of_origin(orig_dec, uni_dec[dec_ind].dec_codes, |
| 4477 |
4/4✓ Branch 0 taken 253127 times.
✓ Branch 1 taken 283912281 times.
✓ Branch 2 taken 154431 times.
✓ Branch 3 taken 284010977 times.
|
284418535 | dec_diff) && |
| 4478 |
2/2✓ Branch 0 taken 154431 times.
✓ Branch 1 taken 98696 times.
|
253127 | !my_comp_in_rulelist(rules, uni_dec[dec_ind].charcode)) { |
| 4479 | 154431 | MY_COLL_RULE newrule{{0}, {uni_dec[dec_ind].charcode, 0}, {0}, 0, false}; | |
| 4480 | 154431 | memcpy(newrule.base, dec_diff, sizeof(newrule.base)); | |
| 4481 |
2/4✓ Branch 0 taken 154431 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 154431 times.
|
154431 | if (my_coll_rules_add(rules, &newrule)) return 1; |
| 4482 |
1/2✓ Branch 0 taken 154431 times.
✗ Branch 1 not taken.
|
154431 | comp_added->set(dec_ind); |
| 4483 | } | ||
| 4484 | } | ||
| 4485 | 307721 | return 0; | |
| 4486 | } | ||
| 4487 | |||
| 4488 | 313946 | static bool combining_mark_in_rulelist(const my_wc_t *dec_codes, | |
| 4489 | const MY_COLL_RULE *r_start, | ||
| 4490 | const MY_COLL_RULE *r_end) { | ||
| 4491 |
1/2✓ Branch 0 taken 466416 times.
✗ Branch 1 not taken.
|
466416 | for (int i = 1; i < MY_UCA_MAX_CONTRACTION; ++i) { |
| 4492 |
2/2✓ Branch 0 taken 279818 times.
✓ Branch 1 taken 186598 times.
|
466416 | if (!*(dec_codes + i)) return false; |
| 4493 |
2/2✓ Branch 0 taken 12146882 times.
✓ Branch 1 taken 152470 times.
|
12299352 | for (const MY_COLL_RULE *r = r_start; r < r_end; ++r) { |
| 4494 |
2/2✓ Branch 0 taken 34128 times.
✓ Branch 1 taken 12112754 times.
|
12146882 | if (r->curr[0] == *(dec_codes + i)) { |
| 4495 | 34128 | return true; | |
| 4496 | } | ||
| 4497 | } | ||
| 4498 | } | ||
| 4499 | ✗ | return false; | |
| 4500 | } | ||
| 4501 | |||
| 4502 | 3023 | static int add_normalization_rules(const CHARSET_INFO *cs, | |
| 4503 | MY_COLL_RULES *rules) { | ||
| 4504 |
3/4✓ Branch 0 taken 3023 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2865 times.
✓ Branch 3 taken 158 times.
|
3023 | if (!cs->coll_param || !cs->coll_param->norm_enabled) return 0; |
| 4505 | 158 | const int orig_rule_num = rules->nrules; | |
| 4506 |
2/2✓ Branch 0 taken 904076 times.
✓ Branch 1 taken 158 times.
|
904234 | for (Unidata_decomp *decomp = std::begin(uni_dec); decomp < std::end(uni_dec); |
| 4507 | ++decomp) { | ||
| 4508 |
2/2✓ Branch 0 taken 313946 times.
✓ Branch 1 taken 11534 times.
|
1229556 | if (!my_compchar_is_normal_char(decomp) || |
| 4509 |
4/4✓ Branch 0 taken 325480 times.
✓ Branch 1 taken 578596 times.
✓ Branch 2 taken 869948 times.
✓ Branch 3 taken 34128 times.
|
1229556 | my_comp_in_rulelist(rules, decomp->charcode) || |
| 4510 |
2/2✓ Branch 0 taken 279818 times.
✓ Branch 1 taken 34128 times.
|
313946 | !combining_mark_in_rulelist(decomp->dec_codes, rules->rule, |
| 4511 | 313946 | rules->rule + orig_rule_num)) | |
| 4512 | 869948 | continue; | |
| 4513 | 34128 | MY_COLL_RULE newrule{{0}, {decomp->charcode, 0}, {0}, 0, false}; | |
| 4514 | 34128 | memcpy(newrule.base, decomp->dec_codes, sizeof(newrule.base)); | |
| 4515 |
2/4✓ Branch 0 taken 34128 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 34128 times.
|
34128 | if (my_coll_rules_add(rules, &newrule)) return 1; |
| 4516 | } | ||
| 4517 | 158 | return 0; | |
| 4518 | } | ||
| 4519 | |||
| 4520 | /** | ||
| 4521 | For every rule in rule list, check and add new rules if it is in | ||
| 4522 | decomposition list. | ||
| 4523 | @param cs Character set info | ||
| 4524 | @param rules The rule list | ||
| 4525 | @return 1 Error happens when adding new rule | ||
| 4526 | 0 Add rules successfully | ||
| 4527 | */ | ||
| 4528 | 29006 | static int my_coll_check_rule_and_inherit(const CHARSET_INFO *cs, | |
| 4529 | MY_COLL_RULES *rules) { | ||
| 4530 |
2/2✓ Branch 0 taken 19647 times.
✓ Branch 1 taken 9359 times.
|
29006 | if (rules->uca->version != UCA_V900) return 0; |
| 4531 | |||
| 4532 | /* | ||
| 4533 | Character can combine with marks to be a new character. For example, | ||
| 4534 | A + [mark b] = A1, A1 + [mark c] = A2. We think the weight of A1 and | ||
| 4535 | A2 should shift with A if A is in rule list and its weight shifts, | ||
| 4536 | unless A1 / A2 is already in rule list. | ||
| 4537 | */ | ||
| 4538 | 9359 | std::bitset<array_elements(uni_dec)> comp_added; | |
| 4539 | 9359 | int orig_rule_num = rules->nrules; | |
| 4540 |
2/2✓ Branch 0 taken 546208 times.
✓ Branch 1 taken 9359 times.
|
555567 | for (int i = 0; i < orig_rule_num; ++i) { |
| 4541 | 546208 | MY_COLL_RULE r = *(rules->rule + i); | |
| 4542 | /* | ||
| 4543 | Do not add inheritance rule for contraction. | ||
| 4544 | But for the Chinese collation, the weight shift rule of Chinese collation | ||
| 4545 | is a bit different from all the languages we added so far. For example, it | ||
| 4546 | has a rule "&e << ... << e\\u0302\\u0300". So far, if a language's rule | ||
| 4547 | involves 'e\\u0302\\u0300', it will use the combining form character, | ||
| 4548 | U+1EC1, and it is not a contraction. If we don't handle this for Chinese | ||
| 4549 | collation, it will skip some further rule inheriting. | ||
| 4550 | */ | ||
| 4551 |
4/4✓ Branch 0 taken 462048 times.
✓ Branch 1 taken 84160 times.
✓ Branch 2 taken 238487 times.
✓ Branch 3 taken 223561 times.
|
546208 | if (cs->coll_param != &zh_coll_param && r.curr[1]) continue; |
| 4552 |
1/2✓ Branch 0 taken 307721 times.
✗ Branch 1 not taken.
|
307721 | Unidata_decomp *decomp_rec = get_decomposition(r.curr[0]); |
| 4553 |
2/4✓ Branch 0 taken 307721 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 307721 times.
|
307721 | if (my_coll_add_inherit_rules(rules, &r, decomp_rec, &comp_added)) return 1; |
| 4554 | } | ||
| 4555 | 9359 | return 0; | |
| 4556 | } | ||
| 4557 | |||
| 4558 | /** | ||
| 4559 | Helper function to store weight boundary values. | ||
| 4560 | @param[out] wt_rec Weight boundary for each character group and gap | ||
| 4561 | between groups | ||
| 4562 | @param rec_ind The position from where to store weight boundary | ||
| 4563 | @param old_begin Beginning weight of character group before reorder | ||
| 4564 | @param old_end End weight of character group before reorder | ||
| 4565 | @param new_begin Beginning weight of character group after reorder | ||
| 4566 | @param new_end End weight of character group after reorder | ||
| 4567 | */ | ||
| 4568 | 5735 | static inline void my_set_weight_rec( | |
| 4569 | Reorder_wt_rec (&wt_rec)[2 * UCA_MAX_CHAR_GRP], int rec_ind, | ||
| 4570 | uint16 old_begin, uint16 old_end, uint16 new_begin, uint16 new_end) { | ||
| 4571 | 5735 | wt_rec[rec_ind] = {{old_begin, old_end}, {new_begin, new_end}}; | |
| 4572 | 5735 | } | |
| 4573 | |||
| 4574 | /** | ||
| 4575 | Calculate the reorder parameters for the character groups. | ||
| 4576 | @param cs Character set info | ||
| 4577 | @param[out] rec_ind The position from where to store weight boundary | ||
| 4578 | */ | ||
| 4579 | 2230 | static void my_calc_char_grp_param(const CHARSET_INFO *cs, int &rec_ind) { | |
| 4580 | 2230 | int weight_start = START_WEIGHT_TO_REORDER; | |
| 4581 | 2230 | int grp_ind = 0; | |
| 4582 | 2230 | Reorder_param *param = cs->coll_param->reorder_param; | |
| 4583 |
1/2✓ Branch 0 taken 5735 times.
✗ Branch 1 not taken.
|
5735 | for (; grp_ind < UCA_MAX_CHAR_GRP; ++grp_ind) { |
| 4584 |
2/2✓ Branch 0 taken 2230 times.
✓ Branch 1 taken 3505 times.
|
5735 | if (param->reorder_grp[grp_ind] == CHARGRP_NONE) break; |
| 4585 | 9886 | for (Char_grp_info *info = std::begin(char_grp_infos); | |
| 4586 |
1/2✓ Branch 0 taken 9886 times.
✗ Branch 1 not taken.
|
9886 | info < std::end(char_grp_infos); ++info) { |
| 4587 |
2/2✓ Branch 0 taken 6381 times.
✓ Branch 1 taken 3505 times.
|
9886 | if (param->reorder_grp[grp_ind] != info->group) continue; |
| 4588 | 3505 | my_set_weight_rec( | |
| 4589 | 3505 | param->wt_rec, grp_ind, info->grp_wt_bdy.begin, info->grp_wt_bdy.end, | |
| 4590 | weight_start, | ||
| 4591 | 3505 | weight_start + info->grp_wt_bdy.end - info->grp_wt_bdy.begin); | |
| 4592 | 3505 | weight_start = param->wt_rec[grp_ind].new_wt_bdy.end + 1; | |
| 4593 | 3505 | break; | |
| 4594 | } | ||
| 4595 | } | ||
| 4596 | 2230 | rec_ind = grp_ind; | |
| 4597 | 2230 | } | |
| 4598 | |||
| 4599 | /** | ||
| 4600 | Calculate the reorder parameters for the gap between character groups. | ||
| 4601 | @param cs Character set info | ||
| 4602 | @param rec_ind The position from where to store weight boundary | ||
| 4603 | */ | ||
| 4604 | 2230 | static void my_calc_char_grp_gap_param(CHARSET_INFO *cs, int &rec_ind) { | |
| 4605 | 2230 | Reorder_param *param = cs->coll_param->reorder_param; | |
| 4606 | 2230 | uint16 weight_start = param->wt_rec[rec_ind - 1].new_wt_bdy.end + 1; | |
| 4607 | 2230 | Char_grp_info *last_grp = nullptr; | |
| 4608 | 13380 | for (Char_grp_info *info = std::begin(char_grp_infos); | |
| 4609 |
2/2✓ Branch 0 taken 11150 times.
✓ Branch 1 taken 2230 times.
|
13380 | info < std::end(char_grp_infos); ++info) { |
| 4610 |
1/2✓ Branch 0 taken 23895 times.
✗ Branch 1 not taken.
|
23895 | for (int ind = 0; ind < UCA_MAX_CHAR_GRP; ++ind) { |
| 4611 |
2/2✓ Branch 0 taken 7645 times.
✓ Branch 1 taken 16250 times.
|
23895 | if (param->reorder_grp[ind] == CHARGRP_NONE) break; |
| 4612 |
2/2✓ Branch 0 taken 12745 times.
✓ Branch 1 taken 3505 times.
|
16250 | if (param->reorder_grp[ind] != info->group) continue; |
| 4613 |
2/2✓ Branch 0 taken 794 times.
✓ Branch 1 taken 2711 times.
|
3505 | if (param->max_weight < info->grp_wt_bdy.end) |
| 4614 | 794 | param->max_weight = info->grp_wt_bdy.end; | |
| 4615 | /* | ||
| 4616 | There might be some character groups before the first character | ||
| 4617 | group in our list. | ||
| 4618 | */ | ||
| 4619 |
4/4✓ Branch 0 taken 2230 times.
✓ Branch 1 taken 1275 times.
✓ Branch 2 taken 955 times.
✓ Branch 3 taken 1275 times.
|
3505 | if (!last_grp && info->grp_wt_bdy.begin > START_WEIGHT_TO_REORDER) { |
| 4620 | 955 | my_set_weight_rec(param->wt_rec, rec_ind, START_WEIGHT_TO_REORDER, | |
| 4621 | 955 | info->grp_wt_bdy.begin - 1, weight_start, | |
| 4622 | 955 | weight_start + (info->grp_wt_bdy.begin - 1) - | |
| 4623 | START_WEIGHT_TO_REORDER); | ||
| 4624 | 955 | weight_start = param->wt_rec[rec_ind].new_wt_bdy.end + 1; | |
| 4625 | 955 | rec_ind++; | |
| 4626 | } | ||
| 4627 | /* Gap between 2 character groups in out list. */ | ||
| 4628 |
3/4✓ Branch 0 taken 1275 times.
✓ Branch 1 taken 2230 times.
✓ Branch 2 taken 1275 times.
✗ Branch 3 not taken.
|
3505 | if (last_grp && last_grp->grp_wt_bdy.end < (info->grp_wt_bdy.begin - 1)) { |
| 4629 | 1275 | my_set_weight_rec(param->wt_rec, rec_ind, last_grp->grp_wt_bdy.end + 1, | |
| 4630 | 1275 | info->grp_wt_bdy.begin - 1, weight_start, | |
| 4631 | 1275 | weight_start + (info->grp_wt_bdy.begin - 1) - | |
| 4632 | 1275 | (last_grp->grp_wt_bdy.end + 1)); | |
| 4633 | 1275 | weight_start = param->wt_rec[rec_ind].new_wt_bdy.end + 1; | |
| 4634 | 1275 | rec_ind++; | |
| 4635 | } | ||
| 4636 | 3505 | last_grp = info; | |
| 4637 | 3505 | break; | |
| 4638 | } | ||
| 4639 | } | ||
| 4640 | 2230 | param->wt_rec_num = rec_ind; | |
| 4641 | 2230 | } | |
| 4642 | |||
| 4643 | /** | ||
| 4644 | Prepare reorder parameters. | ||
| 4645 | @param cs Character set info | ||
| 4646 | */ | ||
| 4647 | 3023 | static int my_prepare_reorder(CHARSET_INFO *cs) { | |
| 4648 | /* | ||
| 4649 | Chinese collation's reordering is done in next_implicit() and | ||
| 4650 | modify_all_zh_pages(). See the comment on zh_reorder_param and | ||
| 4651 | change_zh_implicit(). | ||
| 4652 | */ | ||
| 4653 |
4/4✓ Branch 0 taken 2390 times.
✓ Branch 1 taken 633 times.
✓ Branch 2 taken 160 times.
✓ Branch 3 taken 2230 times.
|
3023 | if (!cs->coll_param->reorder_param || cs->coll_param == &zh_coll_param) |
| 4654 | 793 | return 0; | |
| 4655 | /* | ||
| 4656 | For each group of character, for example, latin characters, | ||
| 4657 | their weights are in a separate range. The default sequence | ||
| 4658 | of these groups is: Latin, Greek, Coptic, Cyrillic, and so | ||
| 4659 | on. Some languages want to change the default sequence. For | ||
| 4660 | example, Croatian wants to put Cyrillic to just behind Latin. | ||
| 4661 | We need to reorder the character groups and change their | ||
| 4662 | weight accordingly. Here we calculate the parameters needed | ||
| 4663 | for weight change. And the change will happen when weight | ||
| 4664 | returns from strnxfrm. | ||
| 4665 | */ | ||
| 4666 | 2230 | int rec_ind = 0; | |
| 4667 | 2230 | my_calc_char_grp_param(cs, rec_ind); | |
| 4668 | 2230 | my_calc_char_grp_gap_param(cs, rec_ind); | |
| 4669 | 2230 | return rec_ind; | |
| 4670 | } | ||
| 4671 | |||
| 4672 | 323 | static void adjust_japanese_weight(CHARSET_INFO *cs, int rec_ind) { | |
| 4673 | /* | ||
| 4674 | Per CLDR 30, Japanese collations need to reorder characters as | ||
| 4675 | [Latin, Kana, Han, others]. So for the original character group list: | ||
| 4676 | [Latin, CharA, Kana, CharB, Han, Others], it should be reordered as | ||
| 4677 | [Latin, Kana, Han, CharA, CharB, Others]. But my_prepare_reorder() | ||
| 4678 | reorders original group to be [Latin, Kana, CharA, CharB, Han, Others]. | ||
| 4679 | This is because Han characters are different from others in that Han | ||
| 4680 | characters' weight is implicit and has two primary weights for each | ||
| 4681 | character. Other characters have only one primary weight for each (base) | ||
| 4682 | character. Han characters always sort bigger. | ||
| 4683 | |||
| 4684 | CLDR defines the collating order for 6355 Japanese Han characters. All | ||
| 4685 | of them are in [U+4E00, U+9FFF]; we give them tailored primary weights | ||
| 4686 | in ja_han_pages. The tailored primary weights are just after Kana, | ||
| 4687 | because these characters are very common. These Han characters' weight | ||
| 4688 | pages will be added to collation's UCA data in copy_ja_han_pages(). | ||
| 4689 | For the other Han characters, we don't change their implicit weights, | ||
| 4690 | which is [FB80 - FB85, 0020, 0002][XXXX, 0000, 0000]. | ||
| 4691 | |||
| 4692 | To make sure CharA and CharB's weight is greater than all Han characters, | ||
| 4693 | we give them weight as [FB86, 0000, 0000][origin weights]. This will be | ||
| 4694 | done in apply_reorder_param(). | ||
| 4695 | |||
| 4696 | Because the values stored in last wt_rec element is calculated for moving | ||
| 4697 | CharA to be after Kana, but we want them to be after all Han character, | ||
| 4698 | we reset the weight boundary here, and will change all these characters' | ||
| 4699 | weight in apply_reorder_param(). | ||
| 4700 | */ | ||
| 4701 | 323 | Reorder_param *param = cs->coll_param->reorder_param; | |
| 4702 | 323 | param->wt_rec[rec_ind - 1].new_wt_bdy.begin = 0; | |
| 4703 | 323 | param->wt_rec[rec_ind - 1].new_wt_bdy.end = 0; | |
| 4704 | 323 | param->wt_rec[rec_ind].old_wt_bdy.begin = param->wt_rec[1].old_wt_bdy.end + 1; | |
| 4705 | 323 | param->wt_rec[rec_ind].old_wt_bdy.end = 0x54A3; | |
| 4706 | 323 | param->wt_rec[rec_ind].new_wt_bdy.begin = 0; | |
| 4707 | 323 | param->wt_rec[rec_ind].new_wt_bdy.end = 0; | |
| 4708 | 323 | param->wt_rec_num++; | |
| 4709 | 323 | param->max_weight = 0x54A3; | |
| 4710 | 323 | } | |
| 4711 | |||
| 4712 | /** | ||
| 4713 | Prepare parametric tailoring, like reorder, etc. | ||
| 4714 | @param cs Character set info | ||
| 4715 | @param rules Collation rule list to add to. | ||
| 4716 | @return false Collation parameters applied successfully. | ||
| 4717 | true Error happened. | ||
| 4718 | */ | ||
| 4719 | 29006 | static bool my_prepare_coll_param(CHARSET_INFO *cs, MY_COLL_RULES *rules) { | |
| 4720 |
4/4✓ Branch 0 taken 9359 times.
✓ Branch 1 taken 19647 times.
✓ Branch 2 taken 6336 times.
✓ Branch 3 taken 3023 times.
|
29006 | if (rules->uca->version != UCA_V900 || !cs->coll_param) return false; |
| 4721 | |||
| 4722 | 3023 | int rec_ind = my_prepare_reorder(cs); | |
| 4723 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3023 times.
|
3023 | if (add_normalization_rules(cs, rules)) return true; |
| 4724 | |||
| 4725 |
2/2✓ Branch 0 taken 323 times.
✓ Branch 1 taken 2700 times.
|
3023 | if (cs->coll_param == &ja_coll_param) adjust_japanese_weight(cs, rec_ind); |
| 4726 | /* Might add other parametric tailoring rules later. */ | ||
| 4727 | 3023 | return false; | |
| 4728 | } | ||
| 4729 | |||
| 4730 | /* | ||
| 4731 | This function copies an UCS2 collation from | ||
| 4732 | the default Unicode Collation Algorithm (UCA) | ||
| 4733 | weights applying tailorings, i.e. a set of | ||
| 4734 | alternative weights for some characters. | ||
| 4735 | |||
| 4736 | The default UCA weights are stored in uca_weight/uca_length. | ||
| 4737 | They consist of 256 pages, 256 character each. | ||
| 4738 | |||
| 4739 | If a page is not overwritten by tailoring rules, | ||
| 4740 | it is copies as is from UCA as is. | ||
| 4741 | |||
| 4742 | If a page contains some overwritten characters, it is | ||
| 4743 | allocated. Untouched characters are copied from the | ||
| 4744 | default weights. | ||
| 4745 | */ | ||
| 4746 | |||
| 4747 | 136555 | static bool create_tailoring(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader) { | |
| 4748 |
2/2✓ Branch 0 taken 107546 times.
✓ Branch 1 taken 29009 times.
|
136555 | if (!cs->tailoring) |
| 4749 | 107546 | return false; /* Ok to add a collation without tailoring */ | |
| 4750 | |||
| 4751 | MY_COLL_RULES rules; | ||
| 4752 | 29009 | MY_UCA_INFO new_uca, *src_uca = nullptr; | |
| 4753 | 29009 | int rc = 0; | |
| 4754 | MY_UCA_INFO *src, *dst; | ||
| 4755 | size_t npages; | ||
| 4756 | bool lengths_are_temporary; | ||
| 4757 | |||
| 4758 | 29009 | loader->errcode = 0; | |
| 4759 | 29009 | *loader->errarg = '\0'; | |
| 4760 | |||
| 4761 | 29009 | memset(&rules, 0, sizeof(rules)); | |
| 4762 | 29009 | rules.loader = loader; | |
| 4763 |
1/2✓ Branch 0 taken 29009 times.
✗ Branch 1 not taken.
|
29009 | rules.uca = cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */ |
| 4764 | 29009 | memset(&new_uca, 0, sizeof(new_uca)); | |
| 4765 | |||
| 4766 | /* Parse ICU Collation Customization expression */ | ||
| 4767 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 29006 times.
|
29009 | if ((rc = my_coll_rule_parse(&rules, cs->tailoring, |
| 4768 |
1/2✓ Branch 0 taken 29009 times.
✗ Branch 1 not taken.
|
29009 | cs->tailoring + strlen(cs->tailoring), |
| 4769 | cs->m_coll_name))) | ||
| 4770 | 3 | goto ex; | |
| 4771 | |||
| 4772 |
2/4✓ Branch 0 taken 29006 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 29006 times.
|
29006 | if ((rc = my_coll_check_rule_and_inherit(cs, &rules))) goto ex; |
| 4773 | |||
| 4774 |
2/4✓ Branch 0 taken 29006 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 29006 times.
|
29006 | if ((rc = my_prepare_coll_param(cs, &rules))) goto ex; |
| 4775 | |||
| 4776 |
2/2✓ Branch 0 taken 972 times.
✓ Branch 1 taken 28034 times.
|
29006 | if (rules.uca->version == UCA_V520) /* Unicode-5.2.0 requested */ |
| 4777 | { | ||
| 4778 | 972 | src_uca = &my_uca_v520; | |
| 4779 | 972 | cs->caseinfo = &my_unicase_unicode520; | |
| 4780 |
2/2✓ Branch 0 taken 18675 times.
✓ Branch 1 taken 9359 times.
|
28034 | } else if (rules.uca->version == UCA_V400) /* Unicode-4.0.0 requested */ |
| 4781 | { | ||
| 4782 | 18675 | src_uca = &my_uca_v400; | |
| 4783 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18675 times.
|
18675 | if (!cs->caseinfo) cs->caseinfo = &my_unicase_default; |
| 4784 | } else /* No Unicode version specified */ | ||
| 4785 | { | ||
| 4786 |
1/2✓ Branch 0 taken 9359 times.
✗ Branch 1 not taken.
|
9359 | src_uca = cs->uca ? cs->uca : &my_uca_v400; |
| 4787 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9359 times.
|
9359 | if (!cs->caseinfo) cs->caseinfo = &my_unicase_default; |
| 4788 | } | ||
| 4789 | |||
| 4790 | /* | ||
| 4791 | For UCA 9.0.0, we don't have a length page, but we still create one | ||
| 4792 | temporarily so that we can keep track of much memory we need to | ||
| 4793 | allocate for weights. | ||
| 4794 | */ | ||
| 4795 | 29006 | src = src_uca; | |
| 4796 | 29006 | dst = &new_uca; | |
| 4797 | |||
| 4798 | 29006 | dst->extra_ce_pri_base = cs->uca->extra_ce_pri_base; | |
| 4799 | 29006 | dst->extra_ce_sec_base = cs->uca->extra_ce_sec_base; | |
| 4800 | 29006 | dst->extra_ce_ter_base = cs->uca->extra_ce_ter_base; | |
| 4801 |
4/4✓ Branch 0 taken 3023 times.
✓ Branch 1 taken 25983 times.
✓ Branch 2 taken 160 times.
✓ Branch 3 taken 2863 times.
|
29006 | if (cs->coll_param && cs->coll_param == &zh_coll_param) { |
| 4802 | 160 | dst->extra_ce_pri_base = ZH_EXTRA_CE_PRI; | |
| 4803 | } | ||
| 4804 | |||
| 4805 | 29006 | npages = (src->maxchar + 1) / 256; | |
| 4806 |
2/2✓ Branch 0 taken 9359 times.
✓ Branch 1 taken 19647 times.
|
29006 | if (rules.uca->version == UCA_V900) { |
| 4807 |
2/4✓ Branch 0 taken 9359 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 9359 times.
|
9359 | if (!(src->lengths = (uchar *)(loader->mem_malloc)(npages))) goto ex; |
| 4808 |
1/2✓ Branch 0 taken 9359 times.
✗ Branch 1 not taken.
|
9359 | synthesize_lengths_900(src->lengths, src->weights, npages); |
| 4809 | } | ||
| 4810 | |||
| 4811 | 29006 | lengths_are_temporary = (rules.uca->version == UCA_V900); | |
| 4812 |
3/4✓ Branch 0 taken 29006 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 29004 times.
|
29006 | if ((rc = init_weight_level(cs, loader, &rules, 0, dst, src, |
| 4813 | lengths_are_temporary))) | ||
| 4814 | 2 | goto ex; | |
| 4815 | |||
| 4816 |
2/2✓ Branch 0 taken 9359 times.
✓ Branch 1 taken 19645 times.
|
29004 | if (lengths_are_temporary) { |
| 4817 |
1/2✓ Branch 0 taken 9359 times.
✗ Branch 1 not taken.
|
9359 | (loader->mem_free)(src->lengths); |
| 4818 |
1/2✓ Branch 0 taken 9359 times.
✗ Branch 1 not taken.
|
9359 | (loader->mem_free)(dst->lengths); |
| 4819 | 9359 | src->lengths = nullptr; | |
| 4820 | 9359 | dst->lengths = nullptr; | |
| 4821 | } | ||
| 4822 | |||
| 4823 | 29004 | new_uca.version = src_uca->version; | |
| 4824 |
2/4✓ Branch 0 taken 29004 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 29004 times.
|
29004 | if (!(cs->uca = (MY_UCA_INFO *)(loader->once_alloc)(sizeof(MY_UCA_INFO)))) { |
| 4825 | ✗ | rc = 1; | |
| 4826 | ✗ | goto ex; | |
| 4827 | } | ||
| 4828 | 29004 | memset(cs->uca, 0, sizeof(MY_UCA_INFO)); | |
| 4829 | 29004 | cs->uca[0] = new_uca; | |
| 4830 | |||
| 4831 | 29009 | ex: | |
| 4832 |
1/2✓ Branch 0 taken 29009 times.
✗ Branch 1 not taken.
|
29009 | (loader->mem_free)(rules.rule); |
| 4833 |
3/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 29004 times.
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
|
29009 | if (rc != 0 && loader->errcode) { |
| 4834 |
3/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
|
5 | if (new_uca.contraction_nodes) delete new_uca.contraction_nodes; |
| 4835 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | loader->reporter(ERROR_LEVEL, loader->errcode, loader->errarg); |
| 4836 | } | ||
| 4837 | 29009 | return rc; | |
| 4838 | } | ||
| 4839 | |||
| 4840 | 19367193 | static void my_coll_uninit_uca(CHARSET_INFO *cs) { | |
| 4841 |
4/4✓ Branch 0 taken 7215187 times.
✓ Branch 1 taken 12152006 times.
✓ Branch 2 taken 8842 times.
✓ Branch 3 taken 7206345 times.
|
19367193 | if (cs->uca && cs->uca->contraction_nodes) { |
| 4842 |
1/2✓ Branch 0 taken 8842 times.
✗ Branch 1 not taken.
|
8842 | delete cs->uca->contraction_nodes; |
| 4843 | 8842 | cs->uca->contraction_nodes = nullptr; | |
| 4844 | 8842 | cs->state &= ~MY_CS_READY; | |
| 4845 | } | ||
| 4846 | 19367193 | } | |
| 4847 | /* | ||
| 4848 | Universal CHARSET_INFO compatible wrappers | ||
| 4849 | for the above internal functions. | ||
| 4850 | Should work for any character set. | ||
| 4851 | */ | ||
| 4852 | |||
| 4853 | extern "C" { | ||
| 4854 | 136555 | static bool my_coll_init_uca(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader) { | |
| 4855 | 136555 | cs->pad_char = ' '; | |
| 4856 | 136555 | cs->ctype = my_charset_utf8_unicode_ci.ctype; | |
| 4857 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 136516 times.
|
136555 | if (!cs->caseinfo) cs->caseinfo = &my_unicase_default; |
| 4858 |
2/2✓ Branch 0 taken 18680 times.
✓ Branch 1 taken 117875 times.
|
136555 | if (!cs->uca) cs->uca = &my_uca_v400; |
| 4859 | 136555 | return create_tailoring(cs, loader); | |
| 4860 | } | ||
| 4861 | |||
| 4862 | 80 | static int my_strnncoll_any_uca(const CHARSET_INFO *cs, const uchar *s, | |
| 4863 | size_t slen, const uchar *t, size_t tlen, | ||
| 4864 | bool t_is_prefix) { | ||
| 4865 |
2/2✓ Branch 0 taken 54 times.
✓ Branch 1 taken 26 times.
|
80 | if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { |
| 4866 |
1/2✓ Branch 0 taken 54 times.
✗ Branch 1 not taken.
|
54 | return my_strnncoll_uca<uca_scanner_any<Mb_wc_utf8mb4>, 1>( |
| 4867 | 54 | cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix); | |
| 4868 | } | ||
| 4869 | |||
| 4870 | 26 | Mb_wc_through_function_pointer mb_wc(cs); | |
| 4871 |
1/2✓ Branch 0 taken 26 times.
✗ Branch 1 not taken.
|
26 | return my_strnncoll_uca<uca_scanner_any<decltype(mb_wc)>, 1>( |
| 4872 | 26 | cs, mb_wc, s, slen, t, tlen, t_is_prefix); | |
| 4873 | } | ||
| 4874 | |||
| 4875 | 264540 | static int my_strnncollsp_any_uca(const CHARSET_INFO *cs, const uchar *s, | |
| 4876 | size_t slen, const uchar *t, size_t tlen) { | ||
| 4877 |
2/2✓ Branch 0 taken 145742 times.
✓ Branch 1 taken 118798 times.
|
264540 | if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { |
| 4878 |
1/2✓ Branch 0 taken 145742 times.
✗ Branch 1 not taken.
|
145742 | return my_strnncollsp_uca(cs, Mb_wc_utf8mb4(), s, slen, t, tlen); |
| 4879 | } | ||
| 4880 | |||
| 4881 | 118798 | Mb_wc_through_function_pointer mb_wc(cs); | |
| 4882 |
1/2✓ Branch 0 taken 118811 times.
✗ Branch 1 not taken.
|
118796 | return my_strnncollsp_uca(cs, mb_wc, s, slen, t, tlen); |
| 4883 | } | ||
| 4884 | |||
| 4885 | 2324 | static void my_hash_sort_any_uca(const CHARSET_INFO *cs, const uchar *s, | |
| 4886 | size_t slen, uint64 *n1, uint64 *n2) { | ||
| 4887 |
2/2✓ Branch 0 taken 648 times.
✓ Branch 1 taken 1676 times.
|
2324 | if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { |
| 4888 | 648 | my_hash_sort_uca(cs, Mb_wc_utf8mb4(), s, slen, n1, n2); | |
| 4889 | } else { | ||
| 4890 | 1676 | Mb_wc_through_function_pointer mb_wc(cs); | |
| 4891 |
1/2✓ Branch 0 taken 1676 times.
✗ Branch 1 not taken.
|
1676 | my_hash_sort_uca(cs, mb_wc, s, slen, n1, n2); |
| 4892 | } | ||
| 4893 | 2324 | } | |
| 4894 | |||
| 4895 | 248749863 | static size_t my_strnxfrm_any_uca(const CHARSET_INFO *cs, uchar *dst, | |
| 4896 | size_t dstlen, uint num_codepoints, | ||
| 4897 | const uchar *src, size_t srclen, uint flags) { | ||
| 4898 |
2/2✓ Branch 0 taken 80229934 times.
✓ Branch 1 taken 168519929 times.
|
248749863 | if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { |
| 4899 |
1/2✓ Branch 0 taken 80229934 times.
✗ Branch 1 not taken.
|
80229934 | return my_strnxfrm_uca(cs, Mb_wc_utf8mb4(), dst, dstlen, num_codepoints, |
| 4900 | 80229934 | src, srclen, flags); | |
| 4901 | } | ||
| 4902 | |||
| 4903 | 168519929 | Mb_wc_through_function_pointer mb_wc(cs); | |
| 4904 |
1/2✓ Branch 0 taken 168519929 times.
✗ Branch 1 not taken.
|
168519929 | return my_strnxfrm_uca(cs, mb_wc, dst, dstlen, num_codepoints, src, srclen, |
| 4905 | 168519929 | flags); | |
| 4906 | } | ||
| 4907 | |||
| 4908 | 5211575177 | static int my_strnncoll_uca_900(const CHARSET_INFO *cs, const uchar *s, | |
| 4909 | size_t slen, const uchar *t, size_t tlen, | ||
| 4910 | bool t_is_prefix) { | ||
| 4911 |
1/2✓ Branch 0 taken 5211600671 times.
✗ Branch 1 not taken.
|
5211575177 | if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { |
| 4912 |
3/5✓ Branch 0 taken 5211594076 times.
✓ Branch 1 taken 4584 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2944 times.
✗ Branch 4 not taken.
|
5211600671 | switch (cs->levels_for_compare) { |
| 4913 | 5211594076 | case 1: | |
| 4914 |
1/2✓ Branch 0 taken 5211879234 times.
✗ Branch 1 not taken.
|
5211594076 | return my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 1>, 1>( |
| 4915 | 5211879234 | cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix); | |
| 4916 | 4584 | case 2: | |
| 4917 |
1/2✓ Branch 0 taken 4584 times.
✗ Branch 1 not taken.
|
4584 | return my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 2>, 2>( |
| 4918 | 4584 | cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix); | |
| 4919 | ✗ | default: | |
| 4920 | ✗ | assert(false); | |
| 4921 | 2944 | case 3: | |
| 4922 |
1/2✓ Branch 0 taken 2944 times.
✗ Branch 1 not taken.
|
2944 | return my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 3>, 3>( |
| 4923 | 2944 | cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix); | |
| 4924 | 745 | case 4: | |
| 4925 |
1/2✓ Branch 0 taken 62 times.
✗ Branch 1 not taken.
|
745 | return my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 4>, 4>( |
| 4926 | 62 | cs, Mb_wc_utf8mb4(), s, slen, t, tlen, t_is_prefix); | |
| 4927 | } | ||
| 4928 | } | ||
| 4929 | |||
| 4930 | ✗ | Mb_wc_through_function_pointer mb_wc(cs); | |
| 4931 | ✗ | switch (cs->levels_for_compare) { | |
| 4932 | ✗ | case 1: | |
| 4933 | ✗ | return my_strnncoll_uca<uca_scanner_900<decltype(mb_wc), 1>, 1>( | |
| 4934 | ✗ | cs, mb_wc, s, slen, t, tlen, t_is_prefix); | |
| 4935 | ✗ | case 2: | |
| 4936 | ✗ | return my_strnncoll_uca<uca_scanner_900<decltype(mb_wc), 2>, 2>( | |
| 4937 | ✗ | cs, mb_wc, s, slen, t, tlen, t_is_prefix); | |
| 4938 | ✗ | default: | |
| 4939 | ✗ | assert(false); | |
| 4940 | ✗ | case 3: | |
| 4941 | ✗ | return my_strnncoll_uca<uca_scanner_900<decltype(mb_wc), 3>, 3>( | |
| 4942 | ✗ | cs, mb_wc, s, slen, t, tlen, t_is_prefix); | |
| 4943 | ✗ | case 4: | |
| 4944 | ✗ | return my_strnncoll_uca<uca_scanner_900<decltype(mb_wc), 4>, 4>( | |
| 4945 | ✗ | cs, mb_wc, s, slen, t, tlen, t_is_prefix); | |
| 4946 | } | ||
| 4947 | } | ||
| 4948 | |||
| 4949 | 812773129 | static int my_strnncollsp_uca_900(const CHARSET_INFO *cs, const uchar *s, | |
| 4950 | size_t slen, const uchar *t, size_t tlen) { | ||
| 4951 | // We are a NO PAD collation, so this is identical to strnncoll. | ||
| 4952 | 812773129 | return my_strnncoll_uca_900(cs, s, slen, t, tlen, false); | |
| 4953 | } | ||
| 4954 | |||
| 4955 | } // extern "C" | ||
| 4956 | |||
| 4957 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 4958 | 303824342 | static void my_hash_sort_uca_900_tmpl(const CHARSET_INFO *cs, const Mb_wc mb_wc, | |
| 4959 | const uchar *s, size_t slen, uint64 *n1) { | ||
| 4960 | 303824342 | uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE> scanner(mb_wc, cs, s, slen); | |
| 4961 | |||
| 4962 | /* | ||
| 4963 | A variation of the FNV-1a hash. The differences between this and | ||
| 4964 | standard FNV-1a as described in literature are: | ||
| 4965 | |||
| 4966 | - We work naturally on 16-bit weights, so we XOR in the entire weight | ||
| 4967 | instead of hashing byte-by-byte. (This is effectively a speed/quality | ||
| 4968 | tradeoff, as it will reduce avalanche.) | ||
| 4969 | - We use the n1 seed by XOR-ing it onto the offset basis; FNV-1a as | ||
| 4970 | typically described does not use a seed. This should be safe, since | ||
| 4971 | there's nothing magical about the offset basis; it's just the FNV-1a | ||
| 4972 | hash of some human-readable text. | ||
| 4973 | |||
| 4974 | This is nowhere near a perfect hash function; it has suboptimal avalanche | ||
| 4975 | characteristics, and it not multicollision resistant. In particular, | ||
| 4976 | it fails many SMHasher tests, mostly for bias (collision tests are fine). | ||
| 4977 | However, it is of much better quality than the home-grown hash used | ||
| 4978 | for other collations (which fails _all_ SMHasher tests), while being | ||
| 4979 | much faster. | ||
| 4980 | |||
| 4981 | We ignore the n2 seed entirely, since we don't need it. The caller is | ||
| 4982 | responsible for doing hash folding at the end; we can't do that. | ||
| 4983 | |||
| 4984 | See http://isthe.com/chongo/tech/comp/fnv/#FNV-param for constants. | ||
| 4985 | */ | ||
| 4986 | |||
| 4987 | 303824372 | uint64 h = *n1; | |
| 4988 | 303824372 | h ^= 14695981039346656037ULL; | |
| 4989 | |||
| 4990 |
2/2✓ Branch 0 taken 151912094 times.
✓ Branch 1 taken 92 times.
|
303824372 | scanner.for_each_weight( |
| 4991 | 2116889324 | [&](int s_res, bool) -> bool { | |
| 4992 | 2116889324 | h ^= s_res; | |
| 4993 | 2116889324 | h *= 1099511628211ULL; | |
| 4994 | 2116889324 | return true; | |
| 4995 | }, | ||
| 4996 | 473260726 | [](int) { return true; }); | |
| 4997 | |||
| 4998 | 303824542 | *n1 = h; | |
| 4999 | } | ||
| 5000 | |||
| 5001 | extern "C" { | ||
| 5002 | |||
| 5003 | 151911974 | static void my_hash_sort_uca_900(const CHARSET_INFO *cs, const uchar *s, | |
| 5004 | size_t slen, uint64 *n1, uint64 *) { | ||
| 5005 |
1/2✓ Branch 0 taken 151912101 times.
✗ Branch 1 not taken.
|
151911974 | if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { |
| 5006 |
3/5✓ Branch 0 taken 151912143 times.
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 141 times.
✗ Branch 4 not taken.
|
151912101 | switch (cs->levels_for_compare) { |
| 5007 | 151912143 | case 1: | |
| 5008 |
1/2✓ Branch 0 taken 151912328 times.
✗ Branch 1 not taken.
|
151912143 | return my_hash_sort_uca_900_tmpl<Mb_wc_utf8mb4, 1>(cs, Mb_wc_utf8mb4(), |
| 5009 | 151912328 | s, slen, n1); | |
| 5010 | 14 | case 2: | |
| 5011 |
1/2✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
|
14 | return my_hash_sort_uca_900_tmpl<Mb_wc_utf8mb4, 2>(cs, Mb_wc_utf8mb4(), |
| 5012 | 14 | s, slen, n1); | |
| 5013 | ✗ | default: | |
| 5014 | ✗ | assert(false); | |
| 5015 | 141 | case 3: | |
| 5016 |
1/2✓ Branch 0 taken 141 times.
✗ Branch 1 not taken.
|
141 | return my_hash_sort_uca_900_tmpl<Mb_wc_utf8mb4, 3>(cs, Mb_wc_utf8mb4(), |
| 5017 | 141 | s, slen, n1); | |
| 5018 | ✗ | case 4: | |
| 5019 | ✗ | return my_hash_sort_uca_900_tmpl<Mb_wc_utf8mb4, 4>(cs, Mb_wc_utf8mb4(), | |
| 5020 | 38 | s, slen, n1); | |
| 5021 | } | ||
| 5022 | } | ||
| 5023 | |||
| 5024 | ✗ | Mb_wc_through_function_pointer mb_wc(cs); | |
| 5025 | ✗ | switch (cs->levels_for_compare) { | |
| 5026 | ✗ | case 1: | |
| 5027 | ✗ | return my_hash_sort_uca_900_tmpl<decltype(mb_wc), 1>(cs, mb_wc, s, slen, | |
| 5028 | ✗ | n1); | |
| 5029 | ✗ | case 2: | |
| 5030 | ✗ | return my_hash_sort_uca_900_tmpl<decltype(mb_wc), 2>(cs, mb_wc, s, slen, | |
| 5031 | ✗ | n1); | |
| 5032 | ✗ | default: | |
| 5033 | ✗ | assert(false); | |
| 5034 | ✗ | case 3: | |
| 5035 | ✗ | return my_hash_sort_uca_900_tmpl<decltype(mb_wc), 3>(cs, mb_wc, s, slen, | |
| 5036 | ✗ | n1); | |
| 5037 | ✗ | case 4: | |
| 5038 | ✗ | return my_hash_sort_uca_900_tmpl<decltype(mb_wc), 4>(cs, mb_wc, s, slen, | |
| 5039 | ✗ | n1); | |
| 5040 | } | ||
| 5041 | } | ||
| 5042 | |||
| 5043 | } // extern "C" | ||
| 5044 | |||
| 5045 | /* | ||
| 5046 | Check if a constant can be propagated | ||
| 5047 | |||
| 5048 | Currently we don't check the constant itself, and decide not to propagate | ||
| 5049 | a constant just if the collation itself allows expansions or contractions. | ||
| 5050 | */ | ||
| 5051 | 189343 | bool my_propagate_uca_900(const CHARSET_INFO *cs, | |
| 5052 | const uchar *str [[maybe_unused]], | ||
| 5053 | size_t length [[maybe_unused]]) { | ||
| 5054 | 189343 | return !my_uca_have_contractions(cs->uca); | |
| 5055 | } | ||
| 5056 | |||
| 5057 | template <class Mb_wc, int LEVELS_FOR_COMPARE> | ||
| 5058 | 494915754 | static size_t my_strnxfrm_uca_900_tmpl(const CHARSET_INFO *cs, | |
| 5059 | const Mb_wc mb_wc, uchar *dst, | ||
| 5060 | size_t dstlen, const uchar *src, | ||
| 5061 | size_t srclen, uint flags) { | ||
| 5062 | 494915754 | uchar *d0 = dst; | |
| 5063 | 494915754 | uchar *dst_end = dst + dstlen; | |
| 5064 | 494915754 | uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE> scanner(mb_wc, cs, src, srclen); | |
| 5065 | |||
| 5066 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 247459484 times.
|
494916080 | assert((dstlen % 2) == 0); |
| 5067 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 247459484 times.
|
494918968 | if ((dstlen % 2) == 1) { |
| 5068 | // Emergency workaround for optimized mode. | ||
| 5069 | ✗ | --dst_end; | |
| 5070 | } | ||
| 5071 | |||
| 5072 |
2/2✓ Branch 0 taken 247458029 times.
✓ Branch 1 taken 1455 times.
|
494918968 | if (dst != dst_end) { |
| 5073 |
2/2✓ Branch 0 taken 50244123 times.
✓ Branch 1 taken 197213906 times.
|
989826970 | scanner.for_each_weight( |
| 5074 | 6200149933 | [&dst, dst_end](int s_res, | |
| 5075 | bool is_level_separator [[maybe_unused]]) -> bool { | ||
| 5076 |
4/16✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 20287995 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 624007531 times.
✗ Branch 12 not taken.
✓ Branch 13 taken 13394654 times.
✗ Branch 14 not taken.
✓ Branch 15 taken 1409026496 times.
|
2066716676 | assert(is_level_separator == (s_res == 0)); |
| 5077 |
1/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1409026496 times.
|
1409026496 | if (LEVELS_FOR_COMPARE == 1) assert(!is_level_separator); |
| 5078 | |||
| 5079 | 2066716676 | dst = store16be(dst, s_res); | |
| 5080 | 2066716581 | return (dst < dst_end); | |
| 5081 | }, | ||
| 5082 | 313245093 | [&dst, dst_end](int num_weights) { | |
| 5083 | 313245093 | return (dst < dst_end - num_weights * 2); | |
| 5084 | }); | ||
| 5085 | } | ||
| 5086 | |||
| 5087 |
2/2✓ Branch 0 taken 79838 times.
✓ Branch 1 taken 247377073 times.
|
494913822 | if (flags & MY_STRXFRM_PAD_TO_MAXLEN) { |
| 5088 | 159676 | memset(dst, 0, dst_end - dst); | |
| 5089 | 159676 | dst = dst_end; | |
| 5090 | } | ||
| 5091 | |||
| 5092 | 494913822 | return dst - d0; | |
| 5093 | } | ||
| 5094 | |||
| 5095 | extern "C" { | ||
| 5096 | |||
| 5097 | 247457290 | static size_t my_strnxfrm_uca_900(const CHARSET_INFO *cs, uchar *dst, | |
| 5098 | size_t dstlen, | ||
| 5099 | uint num_codepoints [[maybe_unused]], | ||
| 5100 | const uchar *src, size_t srclen, uint flags) { | ||
| 5101 |
1/2✓ Branch 0 taken 247457530 times.
✗ Branch 1 not taken.
|
247457290 | if (cs->cset->mb_wc == my_mb_wc_utf8mb4_thunk) { |
| 5102 |
4/5✓ Branch 0 taken 137154357 times.
✓ Branch 1 taken 3344873 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 103615568 times.
✓ Branch 4 taken 3342732 times.
|
247457530 | switch (cs->levels_for_compare) { |
| 5103 | 137154357 | case 1: | |
| 5104 | 137154357 | return my_strnxfrm_uca_900_tmpl<Mb_wc_utf8mb4, 1>( | |
| 5105 | 137153061 | cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags); | |
| 5106 | 3344873 | case 2: | |
| 5107 | 3344873 | return my_strnxfrm_uca_900_tmpl<Mb_wc_utf8mb4, 2>( | |
| 5108 | 3344873 | cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags); | |
| 5109 | ✗ | default: | |
| 5110 | ✗ | assert(false); | |
| 5111 | 103615568 | case 3: | |
| 5112 | 103615568 | return my_strnxfrm_uca_900_tmpl<Mb_wc_utf8mb4, 3>( | |
| 5113 | 103615568 | cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags); | |
| 5114 | 3342732 | case 4: | |
| 5115 | 3342732 | return my_strnxfrm_uca_900_tmpl<Mb_wc_utf8mb4, 4>( | |
| 5116 | 3342738 | cs, Mb_wc_utf8mb4(), dst, dstlen, src, srclen, flags); | |
| 5117 | } | ||
| 5118 | } else { | ||
| 5119 | ✗ | Mb_wc_through_function_pointer mb_wc(cs); | |
| 5120 | ✗ | switch (cs->levels_for_compare) { | |
| 5121 | ✗ | case 1: | |
| 5122 | ✗ | return my_strnxfrm_uca_900_tmpl<decltype(mb_wc), 1>( | |
| 5123 | ✗ | cs, mb_wc, dst, dstlen, src, srclen, flags); | |
| 5124 | ✗ | case 2: | |
| 5125 | ✗ | return my_strnxfrm_uca_900_tmpl<decltype(mb_wc), 2>( | |
| 5126 | ✗ | cs, mb_wc, dst, dstlen, src, srclen, flags); | |
| 5127 | ✗ | default: | |
| 5128 | ✗ | assert(false); | |
| 5129 | ✗ | case 3: | |
| 5130 | ✗ | return my_strnxfrm_uca_900_tmpl<decltype(mb_wc), 3>( | |
| 5131 | ✗ | cs, mb_wc, dst, dstlen, src, srclen, flags); | |
| 5132 | ✗ | case 4: | |
| 5133 | ✗ | return my_strnxfrm_uca_900_tmpl<decltype(mb_wc), 4>( | |
| 5134 | ✗ | cs, mb_wc, dst, dstlen, src, srclen, flags); | |
| 5135 | } | ||
| 5136 | } | ||
| 5137 | } | ||
| 5138 | |||
| 5139 | 1303790 | static size_t my_strnxfrmlen_uca_900(const CHARSET_INFO *cs, size_t len) { | |
| 5140 | /* | ||
| 5141 | The character with the most weights is U+FDFA ARABIC LIGATURE SALLALLAHOU | ||
| 5142 | ALAYHE WASALLAM, which we truncate to eight weights. This is the most we | ||
| 5143 | can get in regular DUCET. | ||
| 5144 | |||
| 5145 | In addition, collations with reorderings can add an extra weight per weight, | ||
| 5146 | which currently only happens on the primary level. We simulate this by | ||
| 5147 | simply adding an extra level. | ||
| 5148 | |||
| 5149 | One could conceivably have tailorings yielding expansions having more than | ||
| 5150 | this, but we don't currently, and mostly, tailorings are about contractions | ||
| 5151 | and adding single weights anyway. | ||
| 5152 | |||
| 5153 | We also need to add room for one level separator between each level. | ||
| 5154 | */ | ||
| 5155 | // We really ought to have len % 4 == 0, but not all calling code conforms. | ||
| 5156 | 1303790 | const size_t num_codepoints = (len + 3) / 4; | |
| 5157 | 1303790 | const size_t max_num_weights_per_level = num_codepoints * 8; | |
| 5158 | 1303790 | size_t max_num_weights = max_num_weights_per_level * cs->levels_for_compare; | |
| 5159 |
4/4✓ Branch 0 taken 798 times.
✓ Branch 1 taken 1302992 times.
✓ Branch 2 taken 638 times.
✓ Branch 3 taken 160 times.
|
1303790 | if (cs->coll_param && cs->coll_param->reorder_param) { |
| 5160 | 638 | max_num_weights += max_num_weights_per_level; | |
| 5161 | } | ||
| 5162 | 1303790 | return (max_num_weights + (cs->levels_for_compare - 1)) * sizeof(uint16_t); | |
| 5163 | } | ||
| 5164 | |||
| 5165 | } // extern "C" | ||
| 5166 | |||
| 5167 | /* | ||
| 5168 | UCS2 optimized CHARSET_INFO compatible wrappers. | ||
| 5169 | */ | ||
| 5170 | extern "C" { | ||
| 5171 | 12 | static int my_strnncoll_ucs2_uca(const CHARSET_INFO *cs, const uchar *s, | |
| 5172 | size_t slen, const uchar *t, size_t tlen, | ||
| 5173 | bool t_is_prefix) { | ||
| 5174 | 12 | Mb_wc_through_function_pointer mb_wc(cs); | |
| 5175 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | return my_strnncoll_uca<uca_scanner_any<decltype(mb_wc)>, 1>( |
| 5176 | 24 | cs, mb_wc, s, slen, t, tlen, t_is_prefix); | |
| 5177 | } | ||
| 5178 | |||
| 5179 | 12139 | static int my_strnncollsp_ucs2_uca(const CHARSET_INFO *cs, const uchar *s, | |
| 5180 | size_t slen, const uchar *t, size_t tlen) { | ||
| 5181 | 12139 | Mb_wc_through_function_pointer mb_wc(cs); | |
| 5182 |
1/2✓ Branch 0 taken 12139 times.
✗ Branch 1 not taken.
|
24278 | return my_strnncollsp_uca(cs, mb_wc, s, slen, t, tlen); |
| 5183 | } | ||
| 5184 | |||
| 5185 | 460 | static void my_hash_sort_ucs2_uca(const CHARSET_INFO *cs, const uchar *s, | |
| 5186 | size_t slen, uint64 *n1, uint64 *n2) { | ||
| 5187 | 460 | Mb_wc_through_function_pointer mb_wc(cs); | |
| 5188 |
1/2✓ Branch 0 taken 460 times.
✗ Branch 1 not taken.
|
460 | my_hash_sort_uca(cs, mb_wc, s, slen, n1, n2); |
| 5189 | 460 | } | |
| 5190 | |||
| 5191 | 4869686 | static size_t my_strnxfrm_ucs2_uca(const CHARSET_INFO *cs, uchar *dst, | |
| 5192 | size_t dstlen, uint num_codepoints, | ||
| 5193 | const uchar *src, size_t srclen, | ||
| 5194 | uint flags) { | ||
| 5195 | 4869686 | Mb_wc_through_function_pointer mb_wc(cs); | |
| 5196 |
1/2✓ Branch 0 taken 4869686 times.
✗ Branch 1 not taken.
|
4869686 | return my_strnxfrm_uca(cs, mb_wc, dst, dstlen, num_codepoints, src, srclen, |
| 5197 | 9739372 | flags); | |
| 5198 | } | ||
| 5199 | } // extern "C" | ||
| 5200 | |||
| 5201 | MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = { | ||
| 5202 | my_coll_init_uca, /* init */ | ||
| 5203 | my_coll_uninit_uca, | ||
| 5204 | my_strnncoll_ucs2_uca, | ||
| 5205 | my_strnncollsp_ucs2_uca, | ||
| 5206 | my_strnxfrm_ucs2_uca, | ||
| 5207 | my_strnxfrmlen_simple, | ||
| 5208 | my_like_range_generic, | ||
| 5209 | my_wildcmp_uca, | ||
| 5210 | nullptr, | ||
| 5211 | my_instr_mb, | ||
| 5212 | my_hash_sort_ucs2_uca, | ||
| 5213 | my_propagate_complex}; | ||
| 5214 | |||
| 5215 | CHARSET_INFO my_charset_ucs2_unicode_ci = { | ||
| 5216 | 128, | ||
| 5217 | 0, | ||
| 5218 | 0, /* number */ | ||
| 5219 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5220 | "ucs2", /* cs name */ | ||
| 5221 | "ucs2_unicode_ci", /* m_coll_name */ | ||
| 5222 | "UCS-2 Unicode", /* comment */ | ||
| 5223 | "", /* tailoring */ | ||
| 5224 | nullptr, /* coll_param */ | ||
| 5225 | nullptr, /* ctype */ | ||
| 5226 | nullptr, /* to_lower */ | ||
| 5227 | nullptr, /* to_upper */ | ||
| 5228 | nullptr, /* sort_order */ | ||
| 5229 | nullptr, /* uca */ | ||
| 5230 | nullptr, /* tab_to_uni */ | ||
| 5231 | nullptr, /* tab_from_uni */ | ||
| 5232 | &my_unicase_default, /* caseinfo */ | ||
| 5233 | nullptr, /* state_map */ | ||
| 5234 | nullptr, /* ident_map */ | ||
| 5235 | 8, /* strxfrm_multiply */ | ||
| 5236 | 1, /* caseup_multiply */ | ||
| 5237 | 1, /* casedn_multiply */ | ||
| 5238 | 2, /* mbminlen */ | ||
| 5239 | 2, /* mbmaxlen */ | ||
| 5240 | 1, /* mbmaxlenlen */ | ||
| 5241 | 9, /* min_sort_char */ | ||
| 5242 | 0xFFFF, /* max_sort_char */ | ||
| 5243 | ' ', /* pad char */ | ||
| 5244 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5245 | 1, /* levels_for_compare */ | ||
| 5246 | &my_charset_ucs2_handler, | ||
| 5247 | &my_collation_ucs2_uca_handler, | ||
| 5248 | PAD_SPACE}; | ||
| 5249 | |||
| 5250 | CHARSET_INFO my_charset_ucs2_icelandic_uca_ci = { | ||
| 5251 | 129, | ||
| 5252 | 0, | ||
| 5253 | 0, /* number */ | ||
| 5254 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5255 | "ucs2", /* cs name */ | ||
| 5256 | "ucs2_icelandic_ci", /* m_coll_name */ | ||
| 5257 | "UCS-2 Unicode", /* comment */ | ||
| 5258 | icelandic, /* tailoring */ | ||
| 5259 | nullptr, /* coll_param */ | ||
| 5260 | nullptr, /* ctype */ | ||
| 5261 | nullptr, /* to_lower */ | ||
| 5262 | nullptr, /* to_upper */ | ||
| 5263 | nullptr, /* sort_order */ | ||
| 5264 | nullptr, /* uca */ | ||
| 5265 | nullptr, /* tab_to_uni */ | ||
| 5266 | nullptr, /* tab_from_uni */ | ||
| 5267 | &my_unicase_default, /* caseinfo */ | ||
| 5268 | nullptr, /* state_map */ | ||
| 5269 | nullptr, /* ident_map */ | ||
| 5270 | 8, /* strxfrm_multiply */ | ||
| 5271 | 1, /* caseup_multiply */ | ||
| 5272 | 1, /* casedn_multiply */ | ||
| 5273 | 2, /* mbminlen */ | ||
| 5274 | 2, /* mbmaxlen */ | ||
| 5275 | 1, /* mbmaxlenlen */ | ||
| 5276 | 9, /* min_sort_char */ | ||
| 5277 | 0xFFFF, /* max_sort_char */ | ||
| 5278 | ' ', /* pad char */ | ||
| 5279 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5280 | 1, /* levels_for_compare */ | ||
| 5281 | &my_charset_ucs2_handler, | ||
| 5282 | &my_collation_ucs2_uca_handler, | ||
| 5283 | PAD_SPACE}; | ||
| 5284 | |||
| 5285 | CHARSET_INFO my_charset_ucs2_latvian_uca_ci = { | ||
| 5286 | 130, | ||
| 5287 | 0, | ||
| 5288 | 0, /* number */ | ||
| 5289 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5290 | "ucs2", /* cs name */ | ||
| 5291 | "ucs2_latvian_ci", /* m_coll_name */ | ||
| 5292 | "UCS-2 Unicode", /* comment */ | ||
| 5293 | latvian, /* tailoring */ | ||
| 5294 | nullptr, /* coll_param */ | ||
| 5295 | nullptr, /* ctype */ | ||
| 5296 | nullptr, /* to_lower */ | ||
| 5297 | nullptr, /* to_upper */ | ||
| 5298 | nullptr, /* sort_order */ | ||
| 5299 | nullptr, /* uca */ | ||
| 5300 | nullptr, /* tab_to_uni */ | ||
| 5301 | nullptr, /* tab_from_uni */ | ||
| 5302 | &my_unicase_default, /* caseinfo */ | ||
| 5303 | nullptr, /* state_map */ | ||
| 5304 | nullptr, /* ident_map */ | ||
| 5305 | 8, /* strxfrm_multiply */ | ||
| 5306 | 1, /* caseup_multiply */ | ||
| 5307 | 1, /* casedn_multiply */ | ||
| 5308 | 2, /* mbminlen */ | ||
| 5309 | 2, /* mbmaxlen */ | ||
| 5310 | 1, /* mbmaxlenlen */ | ||
| 5311 | 9, /* min_sort_char */ | ||
| 5312 | 0xFFFF, /* max_sort_char */ | ||
| 5313 | ' ', /* pad char */ | ||
| 5314 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5315 | 1, /* levels_for_compare */ | ||
| 5316 | &my_charset_ucs2_handler, | ||
| 5317 | &my_collation_ucs2_uca_handler, | ||
| 5318 | PAD_SPACE}; | ||
| 5319 | |||
| 5320 | CHARSET_INFO my_charset_ucs2_romanian_uca_ci = { | ||
| 5321 | 131, | ||
| 5322 | 0, | ||
| 5323 | 0, /* number */ | ||
| 5324 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5325 | "ucs2", /* cs name */ | ||
| 5326 | "ucs2_romanian_ci", /* m_coll_name */ | ||
| 5327 | "UCS-2 Unicode", /* comment */ | ||
| 5328 | romanian, /* tailoring */ | ||
| 5329 | nullptr, /* coll_param */ | ||
| 5330 | nullptr, /* ctype */ | ||
| 5331 | nullptr, /* to_lower */ | ||
| 5332 | nullptr, /* to_upper */ | ||
| 5333 | nullptr, /* sort_order */ | ||
| 5334 | nullptr, /* uca */ | ||
| 5335 | nullptr, /* tab_to_uni */ | ||
| 5336 | nullptr, /* tab_from_uni */ | ||
| 5337 | &my_unicase_default, /* caseinfo */ | ||
| 5338 | nullptr, /* state_map */ | ||
| 5339 | nullptr, /* ident_map */ | ||
| 5340 | 8, /* strxfrm_multiply */ | ||
| 5341 | 1, /* caseup_multiply */ | ||
| 5342 | 1, /* casedn_multiply */ | ||
| 5343 | 2, /* mbminlen */ | ||
| 5344 | 2, /* mbmaxlen */ | ||
| 5345 | 1, /* mbmaxlenlen */ | ||
| 5346 | 9, /* min_sort_char */ | ||
| 5347 | 0xFFFF, /* max_sort_char */ | ||
| 5348 | ' ', /* pad char */ | ||
| 5349 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5350 | 1, /* levels_for_compare */ | ||
| 5351 | &my_charset_ucs2_handler, | ||
| 5352 | &my_collation_ucs2_uca_handler, | ||
| 5353 | PAD_SPACE}; | ||
| 5354 | |||
| 5355 | CHARSET_INFO my_charset_ucs2_slovenian_uca_ci = { | ||
| 5356 | 132, | ||
| 5357 | 0, | ||
| 5358 | 0, /* number */ | ||
| 5359 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5360 | "ucs2", /* cs name */ | ||
| 5361 | "ucs2_slovenian_ci", /* m_coll_name */ | ||
| 5362 | "UCS-2 Unicode", /* comment */ | ||
| 5363 | slovenian, /* tailoring */ | ||
| 5364 | nullptr, /* coll_param */ | ||
| 5365 | nullptr, /* ctype */ | ||
| 5366 | nullptr, /* to_lower */ | ||
| 5367 | nullptr, /* to_upper */ | ||
| 5368 | nullptr, /* sort_order */ | ||
| 5369 | nullptr, /* uca */ | ||
| 5370 | nullptr, /* tab_to_uni */ | ||
| 5371 | nullptr, /* tab_from_uni */ | ||
| 5372 | &my_unicase_default, /* caseinfo */ | ||
| 5373 | nullptr, /* state_map */ | ||
| 5374 | nullptr, /* ident_map */ | ||
| 5375 | 8, /* strxfrm_multiply */ | ||
| 5376 | 1, /* caseup_multiply */ | ||
| 5377 | 1, /* casedn_multiply */ | ||
| 5378 | 2, /* mbminlen */ | ||
| 5379 | 2, /* mbmaxlen */ | ||
| 5380 | 1, /* mbmaxlenlen */ | ||
| 5381 | 9, /* min_sort_char */ | ||
| 5382 | 0xFFFF, /* max_sort_char */ | ||
| 5383 | ' ', /* pad char */ | ||
| 5384 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5385 | 1, /* levels_for_compare */ | ||
| 5386 | &my_charset_ucs2_handler, | ||
| 5387 | &my_collation_ucs2_uca_handler, | ||
| 5388 | PAD_SPACE}; | ||
| 5389 | |||
| 5390 | CHARSET_INFO my_charset_ucs2_polish_uca_ci = { | ||
| 5391 | 133, | ||
| 5392 | 0, | ||
| 5393 | 0, /* number */ | ||
| 5394 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5395 | "ucs2", /* cs name */ | ||
| 5396 | "ucs2_polish_ci", /* m_coll_name */ | ||
| 5397 | "UCS-2 Unicode", /* comment */ | ||
| 5398 | polish, /* tailoring */ | ||
| 5399 | nullptr, /* coll_param */ | ||
| 5400 | nullptr, /* ctype */ | ||
| 5401 | nullptr, /* to_lower */ | ||
| 5402 | nullptr, /* to_upper */ | ||
| 5403 | nullptr, /* sort_order */ | ||
| 5404 | nullptr, /* uca */ | ||
| 5405 | nullptr, /* tab_to_uni */ | ||
| 5406 | nullptr, /* tab_from_uni */ | ||
| 5407 | &my_unicase_default, /* caseinfo */ | ||
| 5408 | nullptr, /* state_map */ | ||
| 5409 | nullptr, /* ident_map */ | ||
| 5410 | 8, /* strxfrm_multiply */ | ||
| 5411 | 1, /* caseup_multiply */ | ||
| 5412 | 1, /* casedn_multiply */ | ||
| 5413 | 2, /* mbminlen */ | ||
| 5414 | 2, /* mbmaxlen */ | ||
| 5415 | 1, /* mbmaxlenlen */ | ||
| 5416 | 9, /* min_sort_char */ | ||
| 5417 | 0xFFFF, /* max_sort_char */ | ||
| 5418 | ' ', /* pad char */ | ||
| 5419 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5420 | 1, /* levels_for_compare */ | ||
| 5421 | &my_charset_ucs2_handler, | ||
| 5422 | &my_collation_ucs2_uca_handler, | ||
| 5423 | PAD_SPACE}; | ||
| 5424 | |||
| 5425 | CHARSET_INFO my_charset_ucs2_estonian_uca_ci = { | ||
| 5426 | 134, | ||
| 5427 | 0, | ||
| 5428 | 0, /* number */ | ||
| 5429 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5430 | "ucs2", /* cs name */ | ||
| 5431 | "ucs2_estonian_ci", /* m_coll_name */ | ||
| 5432 | "UCS-2 Unicode", /* comment */ | ||
| 5433 | estonian, /* tailoring */ | ||
| 5434 | nullptr, /* coll_param */ | ||
| 5435 | nullptr, /* ctype */ | ||
| 5436 | nullptr, /* to_lower */ | ||
| 5437 | nullptr, /* to_upper */ | ||
| 5438 | nullptr, /* sort_order */ | ||
| 5439 | nullptr, /* uca */ | ||
| 5440 | nullptr, /* tab_to_uni */ | ||
| 5441 | nullptr, /* tab_from_uni */ | ||
| 5442 | &my_unicase_default, /* caseinfo */ | ||
| 5443 | nullptr, /* state_map */ | ||
| 5444 | nullptr, /* ident_map */ | ||
| 5445 | 8, /* strxfrm_multiply */ | ||
| 5446 | 1, /* caseup_multiply */ | ||
| 5447 | 1, /* casedn_multiply */ | ||
| 5448 | 2, /* mbminlen */ | ||
| 5449 | 2, /* mbmaxlen */ | ||
| 5450 | 1, /* mbmaxlenlen */ | ||
| 5451 | 9, /* min_sort_char */ | ||
| 5452 | 0xFFFF, /* max_sort_char */ | ||
| 5453 | ' ', /* pad char */ | ||
| 5454 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5455 | 1, /* levels_for_compare */ | ||
| 5456 | &my_charset_ucs2_handler, | ||
| 5457 | &my_collation_ucs2_uca_handler, | ||
| 5458 | PAD_SPACE}; | ||
| 5459 | |||
| 5460 | CHARSET_INFO my_charset_ucs2_spanish_uca_ci = { | ||
| 5461 | 135, | ||
| 5462 | 0, | ||
| 5463 | 0, /* number */ | ||
| 5464 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5465 | "ucs2", /* cs name */ | ||
| 5466 | "ucs2_spanish_ci", /* m_coll_name */ | ||
| 5467 | "UCS-2 Unicode", /* comment */ | ||
| 5468 | spanish, /* tailoring */ | ||
| 5469 | nullptr, /* coll_param */ | ||
| 5470 | nullptr, /* ctype */ | ||
| 5471 | nullptr, /* to_lower */ | ||
| 5472 | nullptr, /* to_upper */ | ||
| 5473 | nullptr, /* sort_order */ | ||
| 5474 | nullptr, /* uca */ | ||
| 5475 | nullptr, /* tab_to_uni */ | ||
| 5476 | nullptr, /* tab_from_uni */ | ||
| 5477 | &my_unicase_default, /* caseinfo */ | ||
| 5478 | nullptr, /* state_map */ | ||
| 5479 | nullptr, /* ident_map */ | ||
| 5480 | 8, /* strxfrm_multiply */ | ||
| 5481 | 1, /* caseup_multiply */ | ||
| 5482 | 1, /* casedn_multiply */ | ||
| 5483 | 2, /* mbminlen */ | ||
| 5484 | 2, /* mbmaxlen */ | ||
| 5485 | 1, /* mbmaxlenlen */ | ||
| 5486 | 9, /* min_sort_char */ | ||
| 5487 | 0xFFFF, /* max_sort_char */ | ||
| 5488 | ' ', /* pad char */ | ||
| 5489 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5490 | 1, /* levels_for_compare */ | ||
| 5491 | &my_charset_ucs2_handler, | ||
| 5492 | &my_collation_ucs2_uca_handler, | ||
| 5493 | PAD_SPACE}; | ||
| 5494 | |||
| 5495 | CHARSET_INFO my_charset_ucs2_swedish_uca_ci = { | ||
| 5496 | 136, | ||
| 5497 | 0, | ||
| 5498 | 0, /* number */ | ||
| 5499 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5500 | "ucs2", /* cs name */ | ||
| 5501 | "ucs2_swedish_ci", /* m_coll_name */ | ||
| 5502 | "UCS-2 Unicode", /* comment */ | ||
| 5503 | swedish, /* tailoring */ | ||
| 5504 | nullptr, /* coll_param */ | ||
| 5505 | nullptr, /* ctype */ | ||
| 5506 | nullptr, /* to_lower */ | ||
| 5507 | nullptr, /* to_upper */ | ||
| 5508 | nullptr, /* sort_order */ | ||
| 5509 | nullptr, /* uca */ | ||
| 5510 | nullptr, /* tab_to_uni */ | ||
| 5511 | nullptr, /* tab_from_uni */ | ||
| 5512 | &my_unicase_default, /* caseinfo */ | ||
| 5513 | nullptr, /* state_map */ | ||
| 5514 | nullptr, /* ident_map */ | ||
| 5515 | 8, /* strxfrm_multiply */ | ||
| 5516 | 1, /* caseup_multiply */ | ||
| 5517 | 1, /* casedn_multiply */ | ||
| 5518 | 2, /* mbminlen */ | ||
| 5519 | 2, /* mbmaxlen */ | ||
| 5520 | 1, /* mbmaxlenlen */ | ||
| 5521 | 9, /* min_sort_char */ | ||
| 5522 | 0xFFFF, /* max_sort_char */ | ||
| 5523 | ' ', /* pad char */ | ||
| 5524 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5525 | 1, /* levels_for_compare */ | ||
| 5526 | &my_charset_ucs2_handler, | ||
| 5527 | &my_collation_ucs2_uca_handler, | ||
| 5528 | PAD_SPACE}; | ||
| 5529 | |||
| 5530 | CHARSET_INFO my_charset_ucs2_turkish_uca_ci = { | ||
| 5531 | 137, | ||
| 5532 | 0, | ||
| 5533 | 0, /* number */ | ||
| 5534 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5535 | "ucs2", /* cs name */ | ||
| 5536 | "ucs2_turkish_ci", /* m_coll_name */ | ||
| 5537 | "UCS-2 Unicode", /* comment */ | ||
| 5538 | turkish, /* tailoring */ | ||
| 5539 | nullptr, /* coll_param */ | ||
| 5540 | nullptr, /* ctype */ | ||
| 5541 | nullptr, /* to_lower */ | ||
| 5542 | nullptr, /* to_upper */ | ||
| 5543 | nullptr, /* sort_order */ | ||
| 5544 | nullptr, /* uca */ | ||
| 5545 | nullptr, /* tab_to_uni */ | ||
| 5546 | nullptr, /* tab_from_uni */ | ||
| 5547 | &my_unicase_turkish, /* caseinfo */ | ||
| 5548 | nullptr, /* state_map */ | ||
| 5549 | nullptr, /* ident_map */ | ||
| 5550 | 8, /* strxfrm_multiply */ | ||
| 5551 | 1, /* caseup_multiply */ | ||
| 5552 | 1, /* casedn_multiply */ | ||
| 5553 | 2, /* mbminlen */ | ||
| 5554 | 2, /* mbmaxlen */ | ||
| 5555 | 1, /* mbmaxlenlen */ | ||
| 5556 | 9, /* min_sort_char */ | ||
| 5557 | 0xFFFF, /* max_sort_char */ | ||
| 5558 | ' ', /* pad char */ | ||
| 5559 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5560 | 1, /* levels_for_compare */ | ||
| 5561 | &my_charset_ucs2_handler, | ||
| 5562 | &my_collation_ucs2_uca_handler, | ||
| 5563 | PAD_SPACE}; | ||
| 5564 | |||
| 5565 | CHARSET_INFO my_charset_ucs2_czech_uca_ci = { | ||
| 5566 | 138, | ||
| 5567 | 0, | ||
| 5568 | 0, /* number */ | ||
| 5569 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5570 | "ucs2", /* cs name */ | ||
| 5571 | "ucs2_czech_ci", /* m_coll_name */ | ||
| 5572 | "UCS-2 Unicode", /* comment */ | ||
| 5573 | czech, /* tailoring */ | ||
| 5574 | nullptr, /* coll_param */ | ||
| 5575 | nullptr, /* ctype */ | ||
| 5576 | nullptr, /* to_lower */ | ||
| 5577 | nullptr, /* to_upper */ | ||
| 5578 | nullptr, /* sort_order */ | ||
| 5579 | nullptr, /* uca */ | ||
| 5580 | nullptr, /* tab_to_uni */ | ||
| 5581 | nullptr, /* tab_from_uni */ | ||
| 5582 | &my_unicase_default, /* caseinfo */ | ||
| 5583 | nullptr, /* state_map */ | ||
| 5584 | nullptr, /* ident_map */ | ||
| 5585 | 8, /* strxfrm_multiply */ | ||
| 5586 | 1, /* caseup_multiply */ | ||
| 5587 | 1, /* casedn_multiply */ | ||
| 5588 | 2, /* mbminlen */ | ||
| 5589 | 2, /* mbmaxlen */ | ||
| 5590 | 1, /* mbmaxlenlen */ | ||
| 5591 | 9, /* min_sort_char */ | ||
| 5592 | 0xFFFF, /* max_sort_char */ | ||
| 5593 | ' ', /* pad char */ | ||
| 5594 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5595 | 1, /* levels_for_compare */ | ||
| 5596 | &my_charset_ucs2_handler, | ||
| 5597 | &my_collation_ucs2_uca_handler, | ||
| 5598 | PAD_SPACE}; | ||
| 5599 | |||
| 5600 | CHARSET_INFO my_charset_ucs2_danish_uca_ci = { | ||
| 5601 | 139, | ||
| 5602 | 0, | ||
| 5603 | 0, /* number */ | ||
| 5604 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5605 | "ucs2", /* cs name */ | ||
| 5606 | "ucs2_danish_ci", /* m_coll_name */ | ||
| 5607 | "UCS-2 Unicode", /* comment */ | ||
| 5608 | danish, /* tailoring */ | ||
| 5609 | nullptr, /* coll_param */ | ||
| 5610 | nullptr, /* ctype */ | ||
| 5611 | nullptr, /* to_lower */ | ||
| 5612 | nullptr, /* to_upper */ | ||
| 5613 | nullptr, /* sort_order */ | ||
| 5614 | nullptr, /* uca */ | ||
| 5615 | nullptr, /* tab_to_uni */ | ||
| 5616 | nullptr, /* tab_from_uni */ | ||
| 5617 | &my_unicase_default, /* caseinfo */ | ||
| 5618 | nullptr, /* state_map */ | ||
| 5619 | nullptr, /* ident_map */ | ||
| 5620 | 8, /* strxfrm_multiply */ | ||
| 5621 | 1, /* caseup_multiply */ | ||
| 5622 | 1, /* casedn_multiply */ | ||
| 5623 | 2, /* mbminlen */ | ||
| 5624 | 2, /* mbmaxlen */ | ||
| 5625 | 1, /* mbmaxlenlen */ | ||
| 5626 | 9, /* min_sort_char */ | ||
| 5627 | 0xFFFF, /* max_sort_char */ | ||
| 5628 | ' ', /* pad char */ | ||
| 5629 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5630 | 1, /* levels_for_compare */ | ||
| 5631 | &my_charset_ucs2_handler, | ||
| 5632 | &my_collation_ucs2_uca_handler, | ||
| 5633 | PAD_SPACE}; | ||
| 5634 | |||
| 5635 | CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci = { | ||
| 5636 | 140, | ||
| 5637 | 0, | ||
| 5638 | 0, /* number */ | ||
| 5639 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5640 | "ucs2", /* cs name */ | ||
| 5641 | "ucs2_lithuanian_ci", /* m_coll_name */ | ||
| 5642 | "UCS-2 Unicode", /* comment */ | ||
| 5643 | lithuanian, /* tailoring */ | ||
| 5644 | nullptr, /* coll_param */ | ||
| 5645 | nullptr, /* ctype */ | ||
| 5646 | nullptr, /* to_lower */ | ||
| 5647 | nullptr, /* to_upper */ | ||
| 5648 | nullptr, /* sort_order */ | ||
| 5649 | nullptr, /* uca */ | ||
| 5650 | nullptr, /* tab_to_uni */ | ||
| 5651 | nullptr, /* tab_from_uni */ | ||
| 5652 | &my_unicase_default, /* caseinfo */ | ||
| 5653 | nullptr, /* state_map */ | ||
| 5654 | nullptr, /* ident_map */ | ||
| 5655 | 8, /* strxfrm_multiply */ | ||
| 5656 | 1, /* caseup_multiply */ | ||
| 5657 | 1, /* casedn_multiply */ | ||
| 5658 | 2, /* mbminlen */ | ||
| 5659 | 2, /* mbmaxlen */ | ||
| 5660 | 1, /* mbmaxlenlen */ | ||
| 5661 | 9, /* min_sort_char */ | ||
| 5662 | 0xFFFF, /* max_sort_char */ | ||
| 5663 | ' ', /* pad char */ | ||
| 5664 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5665 | 1, /* levels_for_compare */ | ||
| 5666 | &my_charset_ucs2_handler, | ||
| 5667 | &my_collation_ucs2_uca_handler, | ||
| 5668 | PAD_SPACE}; | ||
| 5669 | |||
| 5670 | CHARSET_INFO my_charset_ucs2_slovak_uca_ci = { | ||
| 5671 | 141, | ||
| 5672 | 0, | ||
| 5673 | 0, /* number */ | ||
| 5674 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5675 | "ucs2", /* cs name */ | ||
| 5676 | "ucs2_slovak_ci", /* m_coll_name */ | ||
| 5677 | "UCS-2 Unicode", /* comment */ | ||
| 5678 | slovak, /* tailoring */ | ||
| 5679 | nullptr, /* coll_param */ | ||
| 5680 | nullptr, /* ctype */ | ||
| 5681 | nullptr, /* to_lower */ | ||
| 5682 | nullptr, /* to_upper */ | ||
| 5683 | nullptr, /* sort_order */ | ||
| 5684 | nullptr, /* uca */ | ||
| 5685 | nullptr, /* tab_to_uni */ | ||
| 5686 | nullptr, /* tab_from_uni */ | ||
| 5687 | &my_unicase_default, /* caseinfo */ | ||
| 5688 | nullptr, /* state_map */ | ||
| 5689 | nullptr, /* ident_map */ | ||
| 5690 | 8, /* strxfrm_multiply */ | ||
| 5691 | 1, /* caseup_multiply */ | ||
| 5692 | 1, /* casedn_multiply */ | ||
| 5693 | 2, /* mbminlen */ | ||
| 5694 | 2, /* mbmaxlen */ | ||
| 5695 | 1, /* mbmaxlenlen */ | ||
| 5696 | 9, /* min_sort_char */ | ||
| 5697 | 0xFFFF, /* max_sort_char */ | ||
| 5698 | ' ', /* pad char */ | ||
| 5699 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5700 | 1, /* levels_for_compare */ | ||
| 5701 | &my_charset_ucs2_handler, | ||
| 5702 | &my_collation_ucs2_uca_handler, | ||
| 5703 | PAD_SPACE}; | ||
| 5704 | |||
| 5705 | CHARSET_INFO my_charset_ucs2_spanish2_uca_ci = { | ||
| 5706 | 142, | ||
| 5707 | 0, | ||
| 5708 | 0, /* number */ | ||
| 5709 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5710 | "ucs2", /* cs name */ | ||
| 5711 | "ucs2_spanish2_ci", /* m_coll_name */ | ||
| 5712 | "UCS-2 Unicode", /* comment */ | ||
| 5713 | spanish2, /* tailoring */ | ||
| 5714 | nullptr, /* coll_param */ | ||
| 5715 | nullptr, /* ctype */ | ||
| 5716 | nullptr, /* to_lower */ | ||
| 5717 | nullptr, /* to_upper */ | ||
| 5718 | nullptr, /* sort_order */ | ||
| 5719 | nullptr, /* uca */ | ||
| 5720 | nullptr, /* tab_to_uni */ | ||
| 5721 | nullptr, /* tab_from_uni */ | ||
| 5722 | &my_unicase_default, /* caseinfo */ | ||
| 5723 | nullptr, /* state_map */ | ||
| 5724 | nullptr, /* ident_map */ | ||
| 5725 | 8, /* strxfrm_multiply */ | ||
| 5726 | 1, /* caseup_multiply */ | ||
| 5727 | 1, /* casedn_multiply */ | ||
| 5728 | 2, /* mbminlen */ | ||
| 5729 | 2, /* mbmaxlen */ | ||
| 5730 | 1, /* mbmaxlenlen */ | ||
| 5731 | 9, /* min_sort_char */ | ||
| 5732 | 0xFFFF, /* max_sort_char */ | ||
| 5733 | ' ', /* pad char */ | ||
| 5734 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5735 | 1, /* levels_for_compare */ | ||
| 5736 | &my_charset_ucs2_handler, | ||
| 5737 | &my_collation_ucs2_uca_handler, | ||
| 5738 | PAD_SPACE}; | ||
| 5739 | |||
| 5740 | CHARSET_INFO my_charset_ucs2_roman_uca_ci = { | ||
| 5741 | 143, | ||
| 5742 | 0, | ||
| 5743 | 0, /* number */ | ||
| 5744 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5745 | "ucs2", /* cs name */ | ||
| 5746 | "ucs2_roman_ci", /* m_coll_name */ | ||
| 5747 | "UCS-2 Unicode", /* comment */ | ||
| 5748 | roman, /* tailoring */ | ||
| 5749 | nullptr, /* coll_param */ | ||
| 5750 | nullptr, /* ctype */ | ||
| 5751 | nullptr, /* to_lower */ | ||
| 5752 | nullptr, /* to_upper */ | ||
| 5753 | nullptr, /* sort_order */ | ||
| 5754 | nullptr, /* uca */ | ||
| 5755 | nullptr, /* tab_to_uni */ | ||
| 5756 | nullptr, /* tab_from_uni */ | ||
| 5757 | &my_unicase_default, /* caseinfo */ | ||
| 5758 | nullptr, /* state_map */ | ||
| 5759 | nullptr, /* ident_map */ | ||
| 5760 | 8, /* strxfrm_multiply */ | ||
| 5761 | 1, /* caseup_multiply */ | ||
| 5762 | 1, /* casedn_multiply */ | ||
| 5763 | 2, /* mbminlen */ | ||
| 5764 | 2, /* mbmaxlen */ | ||
| 5765 | 1, /* mbmaxlenlen */ | ||
| 5766 | 9, /* min_sort_char */ | ||
| 5767 | 0xFFFF, /* max_sort_char */ | ||
| 5768 | ' ', /* pad char */ | ||
| 5769 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5770 | 1, /* levels_for_compare */ | ||
| 5771 | &my_charset_ucs2_handler, | ||
| 5772 | &my_collation_ucs2_uca_handler, | ||
| 5773 | PAD_SPACE}; | ||
| 5774 | |||
| 5775 | CHARSET_INFO my_charset_ucs2_persian_uca_ci = { | ||
| 5776 | 144, | ||
| 5777 | 0, | ||
| 5778 | 0, /* number */ | ||
| 5779 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5780 | "ucs2", /* cs name */ | ||
| 5781 | "ucs2_persian_ci", /* m_coll_name */ | ||
| 5782 | "UCS-2 Unicode", /* comment */ | ||
| 5783 | persian, /* tailoring */ | ||
| 5784 | nullptr, /* coll_param */ | ||
| 5785 | nullptr, /* ctype */ | ||
| 5786 | nullptr, /* to_lower */ | ||
| 5787 | nullptr, /* to_upper */ | ||
| 5788 | nullptr, /* sort_order */ | ||
| 5789 | nullptr, /* uca */ | ||
| 5790 | nullptr, /* tab_to_uni */ | ||
| 5791 | nullptr, /* tab_from_uni */ | ||
| 5792 | &my_unicase_default, /* caseinfo */ | ||
| 5793 | nullptr, /* state_map */ | ||
| 5794 | nullptr, /* ident_map */ | ||
| 5795 | 8, /* strxfrm_multiply */ | ||
| 5796 | 1, /* caseup_multiply */ | ||
| 5797 | 1, /* casedn_multiply */ | ||
| 5798 | 2, /* mbminlen */ | ||
| 5799 | 2, /* mbmaxlen */ | ||
| 5800 | 1, /* mbmaxlenlen */ | ||
| 5801 | 9, /* min_sort_char */ | ||
| 5802 | 0xFFFF, /* max_sort_char */ | ||
| 5803 | ' ', /* pad char */ | ||
| 5804 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5805 | 1, /* levels_for_compare */ | ||
| 5806 | &my_charset_ucs2_handler, | ||
| 5807 | &my_collation_ucs2_uca_handler, | ||
| 5808 | PAD_SPACE}; | ||
| 5809 | |||
| 5810 | CHARSET_INFO my_charset_ucs2_esperanto_uca_ci = { | ||
| 5811 | 145, | ||
| 5812 | 0, | ||
| 5813 | 0, /* number */ | ||
| 5814 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5815 | "ucs2", /* cs name */ | ||
| 5816 | "ucs2_esperanto_ci", /* m_coll_name */ | ||
| 5817 | "UCS-2 Unicode", /* comment */ | ||
| 5818 | esperanto, /* tailoring */ | ||
| 5819 | nullptr, /* coll_param */ | ||
| 5820 | nullptr, /* ctype */ | ||
| 5821 | nullptr, /* to_lower */ | ||
| 5822 | nullptr, /* to_upper */ | ||
| 5823 | nullptr, /* sort_order */ | ||
| 5824 | nullptr, /* uca */ | ||
| 5825 | nullptr, /* tab_to_uni */ | ||
| 5826 | nullptr, /* tab_from_uni */ | ||
| 5827 | &my_unicase_default, /* caseinfo */ | ||
| 5828 | nullptr, /* state_map */ | ||
| 5829 | nullptr, /* ident_map */ | ||
| 5830 | 8, /* strxfrm_multiply */ | ||
| 5831 | 1, /* caseup_multiply */ | ||
| 5832 | 1, /* casedn_multiply */ | ||
| 5833 | 2, /* mbminlen */ | ||
| 5834 | 2, /* mbmaxlen */ | ||
| 5835 | 1, /* mbmaxlenlen */ | ||
| 5836 | 9, /* min_sort_char */ | ||
| 5837 | 0xFFFF, /* max_sort_char */ | ||
| 5838 | ' ', /* pad char */ | ||
| 5839 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5840 | 1, /* levels_for_compare */ | ||
| 5841 | &my_charset_ucs2_handler, | ||
| 5842 | &my_collation_ucs2_uca_handler, | ||
| 5843 | PAD_SPACE}; | ||
| 5844 | |||
| 5845 | CHARSET_INFO my_charset_ucs2_hungarian_uca_ci = { | ||
| 5846 | 146, | ||
| 5847 | 0, | ||
| 5848 | 0, /* number */ | ||
| 5849 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5850 | "ucs2", /* cs name */ | ||
| 5851 | "ucs2_hungarian_ci", /* m_coll_name */ | ||
| 5852 | "UCS-2 Unicode", /* comment */ | ||
| 5853 | hungarian, /* tailoring */ | ||
| 5854 | nullptr, /* coll_param */ | ||
| 5855 | nullptr, /* ctype */ | ||
| 5856 | nullptr, /* to_lower */ | ||
| 5857 | nullptr, /* to_upper */ | ||
| 5858 | nullptr, /* sort_order */ | ||
| 5859 | nullptr, /* uca */ | ||
| 5860 | nullptr, /* tab_to_uni */ | ||
| 5861 | nullptr, /* tab_from_uni */ | ||
| 5862 | &my_unicase_default, /* caseinfo */ | ||
| 5863 | nullptr, /* state_map */ | ||
| 5864 | nullptr, /* ident_map */ | ||
| 5865 | 8, /* strxfrm_multiply */ | ||
| 5866 | 1, /* caseup_multiply */ | ||
| 5867 | 1, /* casedn_multiply */ | ||
| 5868 | 2, /* mbminlen */ | ||
| 5869 | 2, /* mbmaxlen */ | ||
| 5870 | 1, /* mbmaxlenlen */ | ||
| 5871 | 9, /* min_sort_char */ | ||
| 5872 | 0xFFFF, /* max_sort_char */ | ||
| 5873 | ' ', /* pad char */ | ||
| 5874 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5875 | 1, /* levels_for_compare */ | ||
| 5876 | &my_charset_ucs2_handler, | ||
| 5877 | &my_collation_ucs2_uca_handler, | ||
| 5878 | PAD_SPACE}; | ||
| 5879 | |||
| 5880 | CHARSET_INFO my_charset_ucs2_sinhala_uca_ci = { | ||
| 5881 | 147, | ||
| 5882 | 0, | ||
| 5883 | 0, /* number */ | ||
| 5884 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5885 | "ucs2", /* csname */ | ||
| 5886 | "ucs2_sinhala_ci", /* m_coll_name */ | ||
| 5887 | "UCS-2 Unicode", /* comment */ | ||
| 5888 | sinhala, /* tailoring */ | ||
| 5889 | nullptr, /* coll_param */ | ||
| 5890 | nullptr, /* ctype */ | ||
| 5891 | nullptr, /* to_lower */ | ||
| 5892 | nullptr, /* to_upper */ | ||
| 5893 | nullptr, /* sort_order */ | ||
| 5894 | nullptr, /* uca */ | ||
| 5895 | nullptr, /* tab_to_uni */ | ||
| 5896 | nullptr, /* tab_from_uni */ | ||
| 5897 | &my_unicase_default, /* caseinfo */ | ||
| 5898 | nullptr, /* state_map */ | ||
| 5899 | nullptr, /* ident_map */ | ||
| 5900 | 8, /* strxfrm_multiply */ | ||
| 5901 | 1, /* caseup_multiply */ | ||
| 5902 | 1, /* casedn_multiply */ | ||
| 5903 | 2, /* mbminlen */ | ||
| 5904 | 2, /* mbmaxlen */ | ||
| 5905 | 1, /* mbmaxlenlen */ | ||
| 5906 | 9, /* min_sort_char */ | ||
| 5907 | 0xFFFF, /* max_sort_char */ | ||
| 5908 | ' ', /* pad char */ | ||
| 5909 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5910 | 1, /* levels_for_compare */ | ||
| 5911 | &my_charset_ucs2_handler, | ||
| 5912 | &my_collation_ucs2_uca_handler, | ||
| 5913 | PAD_SPACE}; | ||
| 5914 | |||
| 5915 | CHARSET_INFO my_charset_ucs2_german2_uca_ci = { | ||
| 5916 | 148, | ||
| 5917 | 0, | ||
| 5918 | 0, /* number */ | ||
| 5919 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5920 | "ucs2", /* csname */ | ||
| 5921 | "ucs2_german2_ci", /* m_coll_name */ | ||
| 5922 | "UCS-2 Unicode", /* comment */ | ||
| 5923 | german2, /* tailoring */ | ||
| 5924 | nullptr, /* coll_param */ | ||
| 5925 | nullptr, /* ctype */ | ||
| 5926 | nullptr, /* to_lower */ | ||
| 5927 | nullptr, /* to_upper */ | ||
| 5928 | nullptr, /* sort_order */ | ||
| 5929 | nullptr, /* uca */ | ||
| 5930 | nullptr, /* tab_to_uni */ | ||
| 5931 | nullptr, /* tab_from_uni */ | ||
| 5932 | &my_unicase_default, /* caseinfo */ | ||
| 5933 | nullptr, /* state_map */ | ||
| 5934 | nullptr, /* ident_map */ | ||
| 5935 | 8, /* strxfrm_multiply */ | ||
| 5936 | 1, /* caseup_multiply */ | ||
| 5937 | 1, /* casedn_multiply */ | ||
| 5938 | 2, /* mbminlen */ | ||
| 5939 | 2, /* mbmaxlen */ | ||
| 5940 | 1, /* mbmaxlenlen */ | ||
| 5941 | 9, /* min_sort_char */ | ||
| 5942 | 0xFFFF, /* max_sort_char */ | ||
| 5943 | ' ', /* pad char */ | ||
| 5944 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5945 | 1, /* levels_for_compare */ | ||
| 5946 | &my_charset_ucs2_handler, | ||
| 5947 | &my_collation_ucs2_uca_handler, | ||
| 5948 | PAD_SPACE}; | ||
| 5949 | |||
| 5950 | CHARSET_INFO my_charset_ucs2_croatian_uca_ci = { | ||
| 5951 | 149, | ||
| 5952 | 0, | ||
| 5953 | 0, /* number */ | ||
| 5954 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5955 | "ucs2", /* csname */ | ||
| 5956 | "ucs2_croatian_ci", /* m_coll_name */ | ||
| 5957 | "UCS-2 Unicode", /* comment */ | ||
| 5958 | croatian, /* tailoring */ | ||
| 5959 | nullptr, /* coll_param */ | ||
| 5960 | nullptr, /* ctype */ | ||
| 5961 | nullptr, /* to_lower */ | ||
| 5962 | nullptr, /* to_upper */ | ||
| 5963 | nullptr, /* sort_order */ | ||
| 5964 | nullptr, /* uca */ | ||
| 5965 | nullptr, /* tab_to_uni */ | ||
| 5966 | nullptr, /* tab_from_uni */ | ||
| 5967 | &my_unicase_default, /* caseinfo */ | ||
| 5968 | nullptr, /* state_map */ | ||
| 5969 | nullptr, /* ident_map */ | ||
| 5970 | 8, /* strxfrm_multiply */ | ||
| 5971 | 1, /* caseup_multiply */ | ||
| 5972 | 1, /* casedn_multiply */ | ||
| 5973 | 2, /* mbminlen */ | ||
| 5974 | 2, /* mbmaxlen */ | ||
| 5975 | 1, /* mbmaxlenlen */ | ||
| 5976 | 9, /* min_sort_char */ | ||
| 5977 | 0xFFFF, /* max_sort_char */ | ||
| 5978 | ' ', /* pad char */ | ||
| 5979 | false, /* escape_with_backslash_is_dangerous */ | ||
| 5980 | 1, /* levels_for_compare */ | ||
| 5981 | &my_charset_ucs2_handler, | ||
| 5982 | &my_collation_ucs2_uca_handler, | ||
| 5983 | PAD_SPACE}; | ||
| 5984 | |||
| 5985 | CHARSET_INFO my_charset_ucs2_unicode_520_ci = { | ||
| 5986 | 150, | ||
| 5987 | 0, | ||
| 5988 | 0, /* number */ | ||
| 5989 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 5990 | "ucs2", /* cs name */ | ||
| 5991 | "ucs2_unicode_520_ci", /* m_coll_name */ | ||
| 5992 | "UCS-2 Unicode", /* comment */ | ||
| 5993 | "", /* tailoring */ | ||
| 5994 | nullptr, /* coll_param */ | ||
| 5995 | nullptr, /* ctype */ | ||
| 5996 | nullptr, /* to_lower */ | ||
| 5997 | nullptr, /* to_upper */ | ||
| 5998 | nullptr, /* sort_order */ | ||
| 5999 | &my_uca_v520, /* uca */ | ||
| 6000 | nullptr, /* tab_to_uni */ | ||
| 6001 | nullptr, /* tab_from_uni */ | ||
| 6002 | &my_unicase_unicode520, /* caseinfo */ | ||
| 6003 | nullptr, /* state_map */ | ||
| 6004 | nullptr, /* ident_map */ | ||
| 6005 | 8, /* strxfrm_multiply */ | ||
| 6006 | 1, /* caseup_multiply */ | ||
| 6007 | 1, /* casedn_multiply */ | ||
| 6008 | 2, /* mbminlen */ | ||
| 6009 | 2, /* mbmaxlen */ | ||
| 6010 | 1, /* mbmaxlenlen */ | ||
| 6011 | 9, /* min_sort_char */ | ||
| 6012 | 0xFFFF, /* max_sort_char */ | ||
| 6013 | ' ', /* pad char */ | ||
| 6014 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6015 | 1, /* levels_for_compare */ | ||
| 6016 | &my_charset_ucs2_handler, | ||
| 6017 | &my_collation_ucs2_uca_handler, | ||
| 6018 | PAD_SPACE}; | ||
| 6019 | |||
| 6020 | CHARSET_INFO my_charset_ucs2_vietnamese_ci = { | ||
| 6021 | 151, | ||
| 6022 | 0, | ||
| 6023 | 0, /* number */ | ||
| 6024 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 6025 | "ucs2", /* csname */ | ||
| 6026 | "ucs2_vietnamese_ci", /* m_coll_name */ | ||
| 6027 | "UCS-2 Unicode", /* comment */ | ||
| 6028 | vietnamese, /* tailoring */ | ||
| 6029 | nullptr, /* coll_param */ | ||
| 6030 | nullptr, /* ctype */ | ||
| 6031 | nullptr, /* to_lower */ | ||
| 6032 | nullptr, /* to_upper */ | ||
| 6033 | nullptr, /* sort_order */ | ||
| 6034 | nullptr, /* uca */ | ||
| 6035 | nullptr, /* tab_to_uni */ | ||
| 6036 | nullptr, /* tab_from_uni */ | ||
| 6037 | &my_unicase_default, /* caseinfo */ | ||
| 6038 | nullptr, /* state_map */ | ||
| 6039 | nullptr, /* ident_map */ | ||
| 6040 | 8, /* strxfrm_multiply */ | ||
| 6041 | 1, /* caseup_multiply */ | ||
| 6042 | 1, /* casedn_multiply */ | ||
| 6043 | 2, /* mbminlen */ | ||
| 6044 | 2, /* mbmaxlen */ | ||
| 6045 | 1, /* mbmaxlenlen */ | ||
| 6046 | 9, /* min_sort_char */ | ||
| 6047 | 0xFFFF, /* max_sort_char */ | ||
| 6048 | ' ', /* pad char */ | ||
| 6049 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6050 | 1, /* levels_for_compare */ | ||
| 6051 | &my_charset_ucs2_handler, | ||
| 6052 | &my_collation_ucs2_uca_handler, | ||
| 6053 | PAD_SPACE}; | ||
| 6054 | |||
| 6055 | MY_COLLATION_HANDLER my_collation_any_uca_handler = { | ||
| 6056 | my_coll_init_uca, /* init */ | ||
| 6057 | my_coll_uninit_uca, my_strnncoll_any_uca, my_strnncollsp_any_uca, | ||
| 6058 | my_strnxfrm_any_uca, my_strnxfrmlen_simple, my_like_range_mb, | ||
| 6059 | my_wildcmp_uca, my_strcasecmp_uca, my_instr_mb, | ||
| 6060 | my_hash_sort_any_uca, my_propagate_complex}; | ||
| 6061 | |||
| 6062 | MY_COLLATION_HANDLER my_collation_uca_900_handler = { | ||
| 6063 | my_coll_init_uca, /* init */ | ||
| 6064 | my_coll_uninit_uca, my_strnncoll_uca_900, my_strnncollsp_uca_900, | ||
| 6065 | my_strnxfrm_uca_900, my_strnxfrmlen_uca_900, my_like_range_mb, | ||
| 6066 | my_wildcmp_uca, my_strcasecmp_uca, my_instr_mb, | ||
| 6067 | my_hash_sort_uca_900, my_propagate_uca_900}; | ||
| 6068 | |||
| 6069 | /* | ||
| 6070 | We consider bytes with code more than 127 as a letter. | ||
| 6071 | This guarantees that word boundaries work fine with regular | ||
| 6072 | expressions. Note, there is no need to mark byte 255 as a | ||
| 6073 | letter, it is illegal byte in UTF8. | ||
| 6074 | */ | ||
| 6075 | static const uchar ctype_utf8[] = { | ||
| 6076 | 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, | ||
| 6077 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, | ||
| 6078 | 32, 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, | ||
| 6079 | 16, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 16, 16, 16, 16, 16, | ||
| 6080 | 16, 16, 129, 129, 129, 129, 129, 129, 1, 1, 1, 1, 1, 1, 1, 1, | ||
| 6081 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, | ||
| 6082 | 16, 16, 130, 130, 130, 130, 130, 130, 2, 2, 2, 2, 2, 2, 2, 2, | ||
| 6083 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, | ||
| 6084 | 32, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 6085 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 6086 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 6087 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 6088 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 6089 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 6090 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 6091 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 6092 | 0}; | ||
| 6093 | |||
| 6094 | extern MY_CHARSET_HANDLER my_charset_utf8_handler; | ||
| 6095 | |||
| 6096 | #define MY_CS_UTF8MB3_UCA_FLAGS \ | ||
| 6097 | (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE) | ||
| 6098 | |||
| 6099 | CHARSET_INFO my_charset_utf8_unicode_ci = { | ||
| 6100 | 192, | ||
| 6101 | 0, | ||
| 6102 | 0, /* number */ | ||
| 6103 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6104 | "utf8mb3", /* cs name */ | ||
| 6105 | "utf8mb3_unicode_ci", /* m_coll_name */ | ||
| 6106 | "UCS-2 Unicode", /* comment */ | ||
| 6107 | "", /* tailoring */ | ||
| 6108 | nullptr, /* coll_param */ | ||
| 6109 | ctype_utf8, /* ctype */ | ||
| 6110 | nullptr, /* to_lower */ | ||
| 6111 | nullptr, /* to_upper */ | ||
| 6112 | nullptr, /* sort_order */ | ||
| 6113 | nullptr, /* uca */ | ||
| 6114 | nullptr, /* tab_to_uni */ | ||
| 6115 | nullptr, /* tab_from_uni */ | ||
| 6116 | &my_unicase_default, /* caseinfo */ | ||
| 6117 | nullptr, /* state_map */ | ||
| 6118 | nullptr, /* ident_map */ | ||
| 6119 | 8, /* strxfrm_multiply */ | ||
| 6120 | 1, /* caseup_multiply */ | ||
| 6121 | 1, /* casedn_multiply */ | ||
| 6122 | 1, /* mbminlen */ | ||
| 6123 | 3, /* mbmaxlen */ | ||
| 6124 | 1, /* mbmaxlenlen */ | ||
| 6125 | 9, /* min_sort_char */ | ||
| 6126 | 0xFFFF, /* max_sort_char */ | ||
| 6127 | ' ', /* pad char */ | ||
| 6128 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6129 | 1, /* levels_for_compare */ | ||
| 6130 | &my_charset_utf8_handler, | ||
| 6131 | &my_collation_any_uca_handler, | ||
| 6132 | PAD_SPACE}; | ||
| 6133 | |||
| 6134 | CHARSET_INFO my_charset_utf8_icelandic_uca_ci = { | ||
| 6135 | 193, | ||
| 6136 | 0, | ||
| 6137 | 0, /* number */ | ||
| 6138 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6139 | "utf8mb3", /* cs name */ | ||
| 6140 | "utf8mb3_icelandic_ci", /* m_coll_name */ | ||
| 6141 | "UTF-8 Unicode", /* comment */ | ||
| 6142 | icelandic, /* tailoring */ | ||
| 6143 | nullptr, /* coll_param */ | ||
| 6144 | ctype_utf8, /* ctype */ | ||
| 6145 | nullptr, /* to_lower */ | ||
| 6146 | nullptr, /* to_upper */ | ||
| 6147 | nullptr, /* sort_order */ | ||
| 6148 | nullptr, /* uca */ | ||
| 6149 | nullptr, /* tab_to_uni */ | ||
| 6150 | nullptr, /* tab_from_uni */ | ||
| 6151 | &my_unicase_default, /* caseinfo */ | ||
| 6152 | nullptr, /* state_map */ | ||
| 6153 | nullptr, /* ident_map */ | ||
| 6154 | 8, /* strxfrm_multiply */ | ||
| 6155 | 1, /* caseup_multiply */ | ||
| 6156 | 1, /* casedn_multiply */ | ||
| 6157 | 1, /* mbminlen */ | ||
| 6158 | 3, /* mbmaxlen */ | ||
| 6159 | 1, /* mbmaxlenlen */ | ||
| 6160 | 9, /* min_sort_char */ | ||
| 6161 | 0xFFFF, /* max_sort_char */ | ||
| 6162 | ' ', /* pad char */ | ||
| 6163 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6164 | 1, /* levels_for_compare */ | ||
| 6165 | &my_charset_utf8_handler, | ||
| 6166 | &my_collation_any_uca_handler, | ||
| 6167 | PAD_SPACE}; | ||
| 6168 | |||
| 6169 | CHARSET_INFO my_charset_utf8_latvian_uca_ci = { | ||
| 6170 | 194, | ||
| 6171 | 0, | ||
| 6172 | 0, /* number */ | ||
| 6173 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6174 | "utf8mb3", /* cs name */ | ||
| 6175 | "utf8mb3_latvian_ci", /* m_coll_name */ | ||
| 6176 | "UTF-8 Unicode", /* comment */ | ||
| 6177 | latvian, /* tailoring */ | ||
| 6178 | nullptr, /* coll_param */ | ||
| 6179 | ctype_utf8, /* ctype */ | ||
| 6180 | nullptr, /* to_lower */ | ||
| 6181 | nullptr, /* to_upper */ | ||
| 6182 | nullptr, /* sort_order */ | ||
| 6183 | nullptr, /* uca */ | ||
| 6184 | nullptr, /* tab_to_uni */ | ||
| 6185 | nullptr, /* tab_from_uni */ | ||
| 6186 | &my_unicase_default, /* caseinfo */ | ||
| 6187 | nullptr, /* state_map */ | ||
| 6188 | nullptr, /* ident_map */ | ||
| 6189 | 8, /* strxfrm_multiply */ | ||
| 6190 | 1, /* caseup_multiply */ | ||
| 6191 | 1, /* casedn_multiply */ | ||
| 6192 | 1, /* mbminlen */ | ||
| 6193 | 3, /* mbmaxlen */ | ||
| 6194 | 1, /* mbmaxlenlen */ | ||
| 6195 | 9, /* min_sort_char */ | ||
| 6196 | 0xFFFF, /* max_sort_char */ | ||
| 6197 | ' ', /* pad char */ | ||
| 6198 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6199 | 1, /* levels_for_compare */ | ||
| 6200 | &my_charset_utf8_handler, | ||
| 6201 | &my_collation_any_uca_handler, | ||
| 6202 | PAD_SPACE}; | ||
| 6203 | |||
| 6204 | CHARSET_INFO my_charset_utf8_romanian_uca_ci = { | ||
| 6205 | 195, | ||
| 6206 | 0, | ||
| 6207 | 0, /* number */ | ||
| 6208 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6209 | "utf8mb3", /* cs name */ | ||
| 6210 | "utf8mb3_romanian_ci", /* m_coll_name */ | ||
| 6211 | "UTF-8 Unicode", /* comment */ | ||
| 6212 | romanian, /* tailoring */ | ||
| 6213 | nullptr, /* coll_param */ | ||
| 6214 | ctype_utf8, /* ctype */ | ||
| 6215 | nullptr, /* to_lower */ | ||
| 6216 | nullptr, /* to_upper */ | ||
| 6217 | nullptr, /* sort_order */ | ||
| 6218 | nullptr, /* uca */ | ||
| 6219 | nullptr, /* tab_to_uni */ | ||
| 6220 | nullptr, /* tab_from_uni */ | ||
| 6221 | &my_unicase_default, /* caseinfo */ | ||
| 6222 | nullptr, /* state_map */ | ||
| 6223 | nullptr, /* ident_map */ | ||
| 6224 | 8, /* strxfrm_multiply */ | ||
| 6225 | 1, /* caseup_multiply */ | ||
| 6226 | 1, /* casedn_multiply */ | ||
| 6227 | 1, /* mbminlen */ | ||
| 6228 | 3, /* mbmaxlen */ | ||
| 6229 | 1, /* mbmaxlenlen */ | ||
| 6230 | 9, /* min_sort_char */ | ||
| 6231 | 0xFFFF, /* max_sort_char */ | ||
| 6232 | ' ', /* pad char */ | ||
| 6233 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6234 | 1, /* levels_for_compare */ | ||
| 6235 | &my_charset_utf8_handler, | ||
| 6236 | &my_collation_any_uca_handler, | ||
| 6237 | PAD_SPACE}; | ||
| 6238 | |||
| 6239 | CHARSET_INFO my_charset_utf8_slovenian_uca_ci = { | ||
| 6240 | 196, | ||
| 6241 | 0, | ||
| 6242 | 0, /* number */ | ||
| 6243 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6244 | "utf8mb3", /* cs name */ | ||
| 6245 | "utf8mb3_slovenian_ci", /* m_coll_name */ | ||
| 6246 | "UTF-8 Unicode", /* comment */ | ||
| 6247 | slovenian, /* tailoring */ | ||
| 6248 | nullptr, /* coll_param */ | ||
| 6249 | ctype_utf8, /* ctype */ | ||
| 6250 | nullptr, /* to_lower */ | ||
| 6251 | nullptr, /* to_upper */ | ||
| 6252 | nullptr, /* sort_order */ | ||
| 6253 | nullptr, /* uca */ | ||
| 6254 | nullptr, /* tab_to_uni */ | ||
| 6255 | nullptr, /* tab_from_uni */ | ||
| 6256 | &my_unicase_default, /* caseinfo */ | ||
| 6257 | nullptr, /* state_map */ | ||
| 6258 | nullptr, /* ident_map */ | ||
| 6259 | 8, /* strxfrm_multiply */ | ||
| 6260 | 1, /* caseup_multiply */ | ||
| 6261 | 1, /* casedn_multiply */ | ||
| 6262 | 1, /* mbminlen */ | ||
| 6263 | 3, /* mbmaxlen */ | ||
| 6264 | 1, /* mbmaxlenlen */ | ||
| 6265 | 9, /* min_sort_char */ | ||
| 6266 | 0xFFFF, /* max_sort_char */ | ||
| 6267 | ' ', /* pad char */ | ||
| 6268 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6269 | 1, /* levels_for_compare */ | ||
| 6270 | &my_charset_utf8_handler, | ||
| 6271 | &my_collation_any_uca_handler, | ||
| 6272 | PAD_SPACE}; | ||
| 6273 | |||
| 6274 | CHARSET_INFO my_charset_utf8_polish_uca_ci = { | ||
| 6275 | 197, | ||
| 6276 | 0, | ||
| 6277 | 0, /* number */ | ||
| 6278 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6279 | "utf8mb3", /* cs name */ | ||
| 6280 | "utf8mb3_polish_ci", /* m_coll_name */ | ||
| 6281 | "UTF-8 Unicode", /* comment */ | ||
| 6282 | polish, /* tailoring */ | ||
| 6283 | nullptr, /* coll_param */ | ||
| 6284 | ctype_utf8, /* ctype */ | ||
| 6285 | nullptr, /* to_lower */ | ||
| 6286 | nullptr, /* to_upper */ | ||
| 6287 | nullptr, /* sort_order */ | ||
| 6288 | nullptr, /* uca */ | ||
| 6289 | nullptr, /* tab_to_uni */ | ||
| 6290 | nullptr, /* tab_from_uni */ | ||
| 6291 | &my_unicase_default, /* caseinfo */ | ||
| 6292 | nullptr, /* state_map */ | ||
| 6293 | nullptr, /* ident_map */ | ||
| 6294 | 8, /* strxfrm_multiply */ | ||
| 6295 | 1, /* caseup_multiply */ | ||
| 6296 | 1, /* casedn_multiply */ | ||
| 6297 | 1, /* mbminlen */ | ||
| 6298 | 3, /* mbmaxlen */ | ||
| 6299 | 1, /* mbmaxlenlen */ | ||
| 6300 | 9, /* min_sort_char */ | ||
| 6301 | 0xFFFF, /* max_sort_char */ | ||
| 6302 | ' ', /* pad char */ | ||
| 6303 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6304 | 1, /* levels_for_compare */ | ||
| 6305 | &my_charset_utf8_handler, | ||
| 6306 | &my_collation_any_uca_handler, | ||
| 6307 | PAD_SPACE}; | ||
| 6308 | |||
| 6309 | CHARSET_INFO my_charset_utf8_estonian_uca_ci = { | ||
| 6310 | 198, | ||
| 6311 | 0, | ||
| 6312 | 0, /* number */ | ||
| 6313 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6314 | "utf8mb3", /* cs name */ | ||
| 6315 | "utf8mb3_estonian_ci", /* m_coll_name */ | ||
| 6316 | "UTF-8 Unicode", /* comment */ | ||
| 6317 | estonian, /* tailoring */ | ||
| 6318 | nullptr, /* coll_param */ | ||
| 6319 | ctype_utf8, /* ctype */ | ||
| 6320 | nullptr, /* to_lower */ | ||
| 6321 | nullptr, /* to_upper */ | ||
| 6322 | nullptr, /* sort_order */ | ||
| 6323 | nullptr, /* uca */ | ||
| 6324 | nullptr, /* tab_to_uni */ | ||
| 6325 | nullptr, /* tab_from_uni */ | ||
| 6326 | &my_unicase_default, /* caseinfo */ | ||
| 6327 | nullptr, /* state_map */ | ||
| 6328 | nullptr, /* ident_map */ | ||
| 6329 | 8, /* strxfrm_multiply */ | ||
| 6330 | 1, /* caseup_multiply */ | ||
| 6331 | 1, /* casedn_multiply */ | ||
| 6332 | 1, /* mbminlen */ | ||
| 6333 | 3, /* mbmaxlen */ | ||
| 6334 | 1, /* mbmaxlenlen */ | ||
| 6335 | 9, /* min_sort_char */ | ||
| 6336 | 0xFFFF, /* max_sort_char */ | ||
| 6337 | ' ', /* pad char */ | ||
| 6338 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6339 | 1, /* levels_for_compare */ | ||
| 6340 | &my_charset_utf8_handler, | ||
| 6341 | &my_collation_any_uca_handler, | ||
| 6342 | PAD_SPACE}; | ||
| 6343 | |||
| 6344 | CHARSET_INFO my_charset_utf8_spanish_uca_ci = { | ||
| 6345 | 199, | ||
| 6346 | 0, | ||
| 6347 | 0, /* number */ | ||
| 6348 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6349 | "utf8mb3", /* cs name */ | ||
| 6350 | "utf8mb3_spanish_ci", /* m_coll_name */ | ||
| 6351 | "UTF-8 Unicode", /* comment */ | ||
| 6352 | spanish, /* tailoring */ | ||
| 6353 | nullptr, /* coll_param */ | ||
| 6354 | ctype_utf8, /* ctype */ | ||
| 6355 | nullptr, /* to_lower */ | ||
| 6356 | nullptr, /* to_upper */ | ||
| 6357 | nullptr, /* sort_order */ | ||
| 6358 | nullptr, /* uca */ | ||
| 6359 | nullptr, /* tab_to_uni */ | ||
| 6360 | nullptr, /* tab_from_uni */ | ||
| 6361 | &my_unicase_default, /* caseinfo */ | ||
| 6362 | nullptr, /* state_map */ | ||
| 6363 | nullptr, /* ident_map */ | ||
| 6364 | 8, /* strxfrm_multiply */ | ||
| 6365 | 1, /* caseup_multiply */ | ||
| 6366 | 1, /* casedn_multiply */ | ||
| 6367 | 1, /* mbminlen */ | ||
| 6368 | 3, /* mbmaxlen */ | ||
| 6369 | 1, /* mbmaxlenlen */ | ||
| 6370 | 9, /* min_sort_char */ | ||
| 6371 | 0xFFFF, /* max_sort_char */ | ||
| 6372 | ' ', /* pad char */ | ||
| 6373 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6374 | 1, /* levels_for_compare */ | ||
| 6375 | &my_charset_utf8_handler, | ||
| 6376 | &my_collation_any_uca_handler, | ||
| 6377 | PAD_SPACE}; | ||
| 6378 | |||
| 6379 | CHARSET_INFO my_charset_utf8_swedish_uca_ci = { | ||
| 6380 | 200, | ||
| 6381 | 0, | ||
| 6382 | 0, /* number */ | ||
| 6383 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6384 | "utf8mb3", /* cs name */ | ||
| 6385 | "utf8mb3_swedish_ci", /* m_coll_name */ | ||
| 6386 | "UTF-8 Unicode", /* comment */ | ||
| 6387 | swedish, /* tailoring */ | ||
| 6388 | nullptr, /* coll_param */ | ||
| 6389 | ctype_utf8, /* ctype */ | ||
| 6390 | nullptr, /* to_lower */ | ||
| 6391 | nullptr, /* to_upper */ | ||
| 6392 | nullptr, /* sort_order */ | ||
| 6393 | nullptr, /* uca */ | ||
| 6394 | nullptr, /* tab_to_uni */ | ||
| 6395 | nullptr, /* tab_from_uni */ | ||
| 6396 | &my_unicase_default, /* caseinfo */ | ||
| 6397 | nullptr, /* state_map */ | ||
| 6398 | nullptr, /* ident_map */ | ||
| 6399 | 8, /* strxfrm_multiply */ | ||
| 6400 | 1, /* caseup_multiply */ | ||
| 6401 | 1, /* casedn_multiply */ | ||
| 6402 | 1, /* mbminlen */ | ||
| 6403 | 3, /* mbmaxlen */ | ||
| 6404 | 1, /* mbmaxlenlen */ | ||
| 6405 | 9, /* min_sort_char */ | ||
| 6406 | 0xFFFF, /* max_sort_char */ | ||
| 6407 | ' ', /* pad char */ | ||
| 6408 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6409 | 1, /* levels_for_compare */ | ||
| 6410 | &my_charset_utf8_handler, | ||
| 6411 | &my_collation_any_uca_handler, | ||
| 6412 | PAD_SPACE}; | ||
| 6413 | |||
| 6414 | CHARSET_INFO my_charset_utf8_turkish_uca_ci = { | ||
| 6415 | 201, | ||
| 6416 | 0, | ||
| 6417 | 0, /* number */ | ||
| 6418 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6419 | "utf8mb3", /* cs name */ | ||
| 6420 | "utf8mb3_turkish_ci", /* m_coll_name */ | ||
| 6421 | "UTF-8 Unicode", /* comment */ | ||
| 6422 | turkish, /* tailoring */ | ||
| 6423 | nullptr, /* coll_param */ | ||
| 6424 | ctype_utf8, /* ctype */ | ||
| 6425 | nullptr, /* to_lower */ | ||
| 6426 | nullptr, /* to_upper */ | ||
| 6427 | nullptr, /* sort_order */ | ||
| 6428 | nullptr, /* uca */ | ||
| 6429 | nullptr, /* tab_to_uni */ | ||
| 6430 | nullptr, /* tab_from_uni */ | ||
| 6431 | &my_unicase_turkish, /* caseinfo */ | ||
| 6432 | nullptr, /* state_map */ | ||
| 6433 | nullptr, /* ident_map */ | ||
| 6434 | 8, /* strxfrm_multiply */ | ||
| 6435 | 2, /* caseup_multiply */ | ||
| 6436 | 2, /* casedn_multiply */ | ||
| 6437 | 1, /* mbminlen */ | ||
| 6438 | 3, /* mbmaxlen */ | ||
| 6439 | 1, /* mbmaxlenlen */ | ||
| 6440 | 9, /* min_sort_char */ | ||
| 6441 | 0xFFFF, /* max_sort_char */ | ||
| 6442 | ' ', /* pad char */ | ||
| 6443 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6444 | 1, /* levels_for_compare */ | ||
| 6445 | &my_charset_utf8_handler, | ||
| 6446 | &my_collation_any_uca_handler, | ||
| 6447 | PAD_SPACE}; | ||
| 6448 | |||
| 6449 | CHARSET_INFO my_charset_utf8_czech_uca_ci = { | ||
| 6450 | 202, | ||
| 6451 | 0, | ||
| 6452 | 0, /* number */ | ||
| 6453 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6454 | "utf8mb3", /* cs name */ | ||
| 6455 | "utf8mb3_czech_ci", /* m_coll_name */ | ||
| 6456 | "UTF-8 Unicode", /* comment */ | ||
| 6457 | czech, /* tailoring */ | ||
| 6458 | nullptr, /* coll_param */ | ||
| 6459 | ctype_utf8, /* ctype */ | ||
| 6460 | nullptr, /* to_lower */ | ||
| 6461 | nullptr, /* to_upper */ | ||
| 6462 | nullptr, /* sort_order */ | ||
| 6463 | nullptr, /* uca */ | ||
| 6464 | nullptr, /* tab_to_uni */ | ||
| 6465 | nullptr, /* tab_from_uni */ | ||
| 6466 | &my_unicase_default, /* caseinfo */ | ||
| 6467 | nullptr, /* state_map */ | ||
| 6468 | nullptr, /* ident_map */ | ||
| 6469 | 8, /* strxfrm_multiply */ | ||
| 6470 | 1, /* caseup_multiply */ | ||
| 6471 | 1, /* casedn_multiply */ | ||
| 6472 | 1, /* mbminlen */ | ||
| 6473 | 3, /* mbmaxlen */ | ||
| 6474 | 1, /* mbmaxlenlen */ | ||
| 6475 | 9, /* min_sort_char */ | ||
| 6476 | 0xFFFF, /* max_sort_char */ | ||
| 6477 | ' ', /* pad char */ | ||
| 6478 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6479 | 1, /* levels_for_compare */ | ||
| 6480 | &my_charset_utf8_handler, | ||
| 6481 | &my_collation_any_uca_handler, | ||
| 6482 | PAD_SPACE}; | ||
| 6483 | |||
| 6484 | CHARSET_INFO my_charset_utf8_danish_uca_ci = { | ||
| 6485 | 203, | ||
| 6486 | 0, | ||
| 6487 | 0, /* number */ | ||
| 6488 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6489 | "utf8mb3", /* cs name */ | ||
| 6490 | "utf8mb3_danish_ci", /* m_coll_name */ | ||
| 6491 | "UTF-8 Unicode", /* comment */ | ||
| 6492 | danish, /* tailoring */ | ||
| 6493 | nullptr, /* coll_param */ | ||
| 6494 | ctype_utf8, /* ctype */ | ||
| 6495 | nullptr, /* to_lower */ | ||
| 6496 | nullptr, /* to_upper */ | ||
| 6497 | nullptr, /* sort_order */ | ||
| 6498 | nullptr, /* uca */ | ||
| 6499 | nullptr, /* tab_to_uni */ | ||
| 6500 | nullptr, /* tab_from_uni */ | ||
| 6501 | &my_unicase_default, /* caseinfo */ | ||
| 6502 | nullptr, /* state_map */ | ||
| 6503 | nullptr, /* ident_map */ | ||
| 6504 | 8, /* strxfrm_multiply */ | ||
| 6505 | 1, /* caseup_multiply */ | ||
| 6506 | 1, /* casedn_multiply */ | ||
| 6507 | 1, /* mbminlen */ | ||
| 6508 | 3, /* mbmaxlen */ | ||
| 6509 | 1, /* mbmaxlenlen */ | ||
| 6510 | 9, /* min_sort_char */ | ||
| 6511 | 0xFFFF, /* max_sort_char */ | ||
| 6512 | ' ', /* pad char */ | ||
| 6513 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6514 | 1, /* levels_for_compare */ | ||
| 6515 | &my_charset_utf8_handler, | ||
| 6516 | &my_collation_any_uca_handler, | ||
| 6517 | PAD_SPACE}; | ||
| 6518 | |||
| 6519 | CHARSET_INFO my_charset_utf8_lithuanian_uca_ci = { | ||
| 6520 | 204, | ||
| 6521 | 0, | ||
| 6522 | 0, /* number */ | ||
| 6523 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6524 | "utf8mb3", /* cs name */ | ||
| 6525 | "utf8mb3_lithuanian_ci", /* m_coll_name */ | ||
| 6526 | "UTF-8 Unicode", /* comment */ | ||
| 6527 | lithuanian, /* tailoring */ | ||
| 6528 | nullptr, /* coll_param */ | ||
| 6529 | ctype_utf8, /* ctype */ | ||
| 6530 | nullptr, /* to_lower */ | ||
| 6531 | nullptr, /* to_upper */ | ||
| 6532 | nullptr, /* sort_order */ | ||
| 6533 | nullptr, /* uca */ | ||
| 6534 | nullptr, /* tab_to_uni */ | ||
| 6535 | nullptr, /* tab_from_uni */ | ||
| 6536 | &my_unicase_default, /* caseinfo */ | ||
| 6537 | nullptr, /* state_map */ | ||
| 6538 | nullptr, /* ident_map */ | ||
| 6539 | 8, /* strxfrm_multiply */ | ||
| 6540 | 1, /* caseup_multiply */ | ||
| 6541 | 1, /* casedn_multiply */ | ||
| 6542 | 1, /* mbminlen */ | ||
| 6543 | 3, /* mbmaxlen */ | ||
| 6544 | 1, /* mbmaxlenlen */ | ||
| 6545 | 9, /* min_sort_char */ | ||
| 6546 | 0xFFFF, /* max_sort_char */ | ||
| 6547 | ' ', /* pad char */ | ||
| 6548 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6549 | 1, /* levels_for_compare */ | ||
| 6550 | &my_charset_utf8_handler, | ||
| 6551 | &my_collation_any_uca_handler, | ||
| 6552 | PAD_SPACE}; | ||
| 6553 | |||
| 6554 | CHARSET_INFO my_charset_utf8_slovak_uca_ci = { | ||
| 6555 | 205, | ||
| 6556 | 0, | ||
| 6557 | 0, /* number */ | ||
| 6558 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6559 | "utf8mb3", /* cs name */ | ||
| 6560 | "utf8mb3_slovak_ci", /* m_coll_name */ | ||
| 6561 | "UTF-8 Unicode", /* comment */ | ||
| 6562 | slovak, /* tailoring */ | ||
| 6563 | nullptr, /* coll_param */ | ||
| 6564 | ctype_utf8, /* ctype */ | ||
| 6565 | nullptr, /* to_lower */ | ||
| 6566 | nullptr, /* to_upper */ | ||
| 6567 | nullptr, /* sort_order */ | ||
| 6568 | nullptr, /* uca */ | ||
| 6569 | nullptr, /* tab_to_uni */ | ||
| 6570 | nullptr, /* tab_from_uni */ | ||
| 6571 | &my_unicase_default, /* caseinfo */ | ||
| 6572 | nullptr, /* state_map */ | ||
| 6573 | nullptr, /* ident_map */ | ||
| 6574 | 8, /* strxfrm_multiply */ | ||
| 6575 | 1, /* caseup_multiply */ | ||
| 6576 | 1, /* casedn_multiply */ | ||
| 6577 | 1, /* mbminlen */ | ||
| 6578 | 3, /* mbmaxlen */ | ||
| 6579 | 1, /* mbmaxlenlen */ | ||
| 6580 | 9, /* min_sort_char */ | ||
| 6581 | 0xFFFF, /* max_sort_char */ | ||
| 6582 | ' ', /* pad char */ | ||
| 6583 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6584 | 1, /* levels_for_compare */ | ||
| 6585 | &my_charset_utf8_handler, | ||
| 6586 | &my_collation_any_uca_handler, | ||
| 6587 | PAD_SPACE}; | ||
| 6588 | |||
| 6589 | CHARSET_INFO my_charset_utf8_spanish2_uca_ci = { | ||
| 6590 | 206, | ||
| 6591 | 0, | ||
| 6592 | 0, /* number */ | ||
| 6593 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6594 | "utf8mb3", /* cs name */ | ||
| 6595 | "utf8mb3_spanish2_ci", /* m_coll_name */ | ||
| 6596 | "UTF-8 Unicode", /* comment */ | ||
| 6597 | spanish2, /* tailoring */ | ||
| 6598 | nullptr, /* coll_param */ | ||
| 6599 | ctype_utf8, /* ctype */ | ||
| 6600 | nullptr, /* to_lower */ | ||
| 6601 | nullptr, /* to_upper */ | ||
| 6602 | nullptr, /* sort_order */ | ||
| 6603 | nullptr, /* uca */ | ||
| 6604 | nullptr, /* tab_to_uni */ | ||
| 6605 | nullptr, /* tab_from_uni */ | ||
| 6606 | &my_unicase_default, /* caseinfo */ | ||
| 6607 | nullptr, /* state_map */ | ||
| 6608 | nullptr, /* ident_map */ | ||
| 6609 | 8, /* strxfrm_multiply */ | ||
| 6610 | 1, /* caseup_multiply */ | ||
| 6611 | 1, /* casedn_multiply */ | ||
| 6612 | 1, /* mbminlen */ | ||
| 6613 | 3, /* mbmaxlen */ | ||
| 6614 | 1, /* mbmaxlenlen */ | ||
| 6615 | 9, /* min_sort_char */ | ||
| 6616 | 0xFFFF, /* max_sort_char */ | ||
| 6617 | ' ', /* pad char */ | ||
| 6618 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6619 | 1, /* levels_for_compare */ | ||
| 6620 | &my_charset_utf8_handler, | ||
| 6621 | &my_collation_any_uca_handler, | ||
| 6622 | PAD_SPACE}; | ||
| 6623 | |||
| 6624 | CHARSET_INFO my_charset_utf8_roman_uca_ci = { | ||
| 6625 | 207, | ||
| 6626 | 0, | ||
| 6627 | 0, /* number */ | ||
| 6628 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6629 | "utf8mb3", /* cs name */ | ||
| 6630 | "utf8mb3_roman_ci", /* m_coll_name */ | ||
| 6631 | "UTF-8 Unicode", /* comment */ | ||
| 6632 | roman, /* tailoring */ | ||
| 6633 | nullptr, /* coll_param */ | ||
| 6634 | ctype_utf8, /* ctype */ | ||
| 6635 | nullptr, /* to_lower */ | ||
| 6636 | nullptr, /* to_upper */ | ||
| 6637 | nullptr, /* sort_order */ | ||
| 6638 | nullptr, /* uca */ | ||
| 6639 | nullptr, /* tab_to_uni */ | ||
| 6640 | nullptr, /* tab_from_uni */ | ||
| 6641 | &my_unicase_default, /* caseinfo */ | ||
| 6642 | nullptr, /* state_map */ | ||
| 6643 | nullptr, /* ident_map */ | ||
| 6644 | 8, /* strxfrm_multiply */ | ||
| 6645 | 1, /* caseup_multiply */ | ||
| 6646 | 1, /* casedn_multiply */ | ||
| 6647 | 1, /* mbminlen */ | ||
| 6648 | 3, /* mbmaxlen */ | ||
| 6649 | 1, /* mbmaxlenlen */ | ||
| 6650 | 9, /* min_sort_char */ | ||
| 6651 | 0xFFFF, /* max_sort_char */ | ||
| 6652 | ' ', /* pad char */ | ||
| 6653 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6654 | 1, /* levels_for_compare */ | ||
| 6655 | &my_charset_utf8_handler, | ||
| 6656 | &my_collation_any_uca_handler, | ||
| 6657 | PAD_SPACE}; | ||
| 6658 | |||
| 6659 | CHARSET_INFO my_charset_utf8_persian_uca_ci = { | ||
| 6660 | 208, | ||
| 6661 | 0, | ||
| 6662 | 0, /* number */ | ||
| 6663 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6664 | "utf8mb3", /* cs name */ | ||
| 6665 | "utf8mb3_persian_ci", /* m_coll_name */ | ||
| 6666 | "UTF-8 Unicode", /* comment */ | ||
| 6667 | persian, /* tailoring */ | ||
| 6668 | nullptr, /* coll_param */ | ||
| 6669 | ctype_utf8, /* ctype */ | ||
| 6670 | nullptr, /* to_lower */ | ||
| 6671 | nullptr, /* to_upper */ | ||
| 6672 | nullptr, /* sort_order */ | ||
| 6673 | nullptr, /* uca */ | ||
| 6674 | nullptr, /* tab_to_uni */ | ||
| 6675 | nullptr, /* tab_from_uni */ | ||
| 6676 | &my_unicase_default, /* caseinfo */ | ||
| 6677 | nullptr, /* state_map */ | ||
| 6678 | nullptr, /* ident_map */ | ||
| 6679 | 8, /* strxfrm_multiply */ | ||
| 6680 | 1, /* caseup_multiply */ | ||
| 6681 | 1, /* casedn_multiply */ | ||
| 6682 | 1, /* mbminlen */ | ||
| 6683 | 3, /* mbmaxlen */ | ||
| 6684 | 1, /* mbmaxlenlen */ | ||
| 6685 | 9, /* min_sort_char */ | ||
| 6686 | 0xFFFF, /* max_sort_char */ | ||
| 6687 | ' ', /* pad char */ | ||
| 6688 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6689 | 1, /* levels_for_compare */ | ||
| 6690 | &my_charset_utf8_handler, | ||
| 6691 | &my_collation_any_uca_handler, | ||
| 6692 | PAD_SPACE}; | ||
| 6693 | |||
| 6694 | CHARSET_INFO my_charset_utf8_esperanto_uca_ci = { | ||
| 6695 | 209, | ||
| 6696 | 0, | ||
| 6697 | 0, /* number */ | ||
| 6698 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6699 | "utf8mb3", /* cs name */ | ||
| 6700 | "utf8mb3_esperanto_ci", /* m_coll_name */ | ||
| 6701 | "UTF-8 Unicode", /* comment */ | ||
| 6702 | esperanto, /* tailoring */ | ||
| 6703 | nullptr, /* coll_param */ | ||
| 6704 | ctype_utf8, /* ctype */ | ||
| 6705 | nullptr, /* to_lower */ | ||
| 6706 | nullptr, /* to_upper */ | ||
| 6707 | nullptr, /* sort_order */ | ||
| 6708 | nullptr, /* uca */ | ||
| 6709 | nullptr, /* tab_to_uni */ | ||
| 6710 | nullptr, /* tab_from_uni */ | ||
| 6711 | &my_unicase_default, /* caseinfo */ | ||
| 6712 | nullptr, /* state_map */ | ||
| 6713 | nullptr, /* ident_map */ | ||
| 6714 | 8, /* strxfrm_multiply */ | ||
| 6715 | 1, /* caseup_multiply */ | ||
| 6716 | 1, /* casedn_multiply */ | ||
| 6717 | 1, /* mbminlen */ | ||
| 6718 | 3, /* mbmaxlen */ | ||
| 6719 | 1, /* mbmaxlenlen */ | ||
| 6720 | 9, /* min_sort_char */ | ||
| 6721 | 0xFFFF, /* max_sort_char */ | ||
| 6722 | ' ', /* pad char */ | ||
| 6723 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6724 | 1, /* levels_for_compare */ | ||
| 6725 | &my_charset_utf8_handler, | ||
| 6726 | &my_collation_any_uca_handler, | ||
| 6727 | PAD_SPACE}; | ||
| 6728 | |||
| 6729 | CHARSET_INFO my_charset_utf8_hungarian_uca_ci = { | ||
| 6730 | 210, | ||
| 6731 | 0, | ||
| 6732 | 0, /* number */ | ||
| 6733 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6734 | "utf8mb3", /* cs name */ | ||
| 6735 | "utf8mb3_hungarian_ci", /* m_coll_name */ | ||
| 6736 | "UTF-8 Unicode", /* comment */ | ||
| 6737 | hungarian, /* tailoring */ | ||
| 6738 | nullptr, /* coll_param */ | ||
| 6739 | ctype_utf8, /* ctype */ | ||
| 6740 | nullptr, /* to_lower */ | ||
| 6741 | nullptr, /* to_upper */ | ||
| 6742 | nullptr, /* sort_order */ | ||
| 6743 | nullptr, /* uca */ | ||
| 6744 | nullptr, /* tab_to_uni */ | ||
| 6745 | nullptr, /* tab_from_uni */ | ||
| 6746 | &my_unicase_default, /* caseinfo */ | ||
| 6747 | nullptr, /* state_map */ | ||
| 6748 | nullptr, /* ident_map */ | ||
| 6749 | 8, /* strxfrm_multiply */ | ||
| 6750 | 1, /* caseup_multiply */ | ||
| 6751 | 1, /* casedn_multiply */ | ||
| 6752 | 1, /* mbminlen */ | ||
| 6753 | 3, /* mbmaxlen */ | ||
| 6754 | 1, /* mbmaxlenlen */ | ||
| 6755 | 9, /* min_sort_char */ | ||
| 6756 | 0xFFFF, /* max_sort_char */ | ||
| 6757 | ' ', /* pad char */ | ||
| 6758 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6759 | 1, /* levels_for_compare */ | ||
| 6760 | &my_charset_utf8_handler, | ||
| 6761 | &my_collation_any_uca_handler, | ||
| 6762 | PAD_SPACE}; | ||
| 6763 | |||
| 6764 | CHARSET_INFO my_charset_utf8_sinhala_uca_ci = { | ||
| 6765 | 211, | ||
| 6766 | 0, | ||
| 6767 | 0, /* number */ | ||
| 6768 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6769 | "utf8mb3", /* cs name */ | ||
| 6770 | "utf8mb3_sinhala_ci", /* m_coll_name */ | ||
| 6771 | "UTF-8 Unicode", /* comment */ | ||
| 6772 | sinhala, /* tailoring */ | ||
| 6773 | nullptr, /* coll_param */ | ||
| 6774 | ctype_utf8, /* ctype */ | ||
| 6775 | nullptr, /* to_lower */ | ||
| 6776 | nullptr, /* to_upper */ | ||
| 6777 | nullptr, /* sort_order */ | ||
| 6778 | nullptr, /* uca */ | ||
| 6779 | nullptr, /* tab_to_uni */ | ||
| 6780 | nullptr, /* tab_from_uni */ | ||
| 6781 | &my_unicase_default, /* caseinfo */ | ||
| 6782 | nullptr, /* state_map */ | ||
| 6783 | nullptr, /* ident_map */ | ||
| 6784 | 8, /* strxfrm_multiply */ | ||
| 6785 | 1, /* caseup_multiply */ | ||
| 6786 | 1, /* casedn_multiply */ | ||
| 6787 | 1, /* mbminlen */ | ||
| 6788 | 3, /* mbmaxlen */ | ||
| 6789 | 1, /* mbmaxlenlen */ | ||
| 6790 | 9, /* min_sort_char */ | ||
| 6791 | 0xFFFF, /* max_sort_char */ | ||
| 6792 | ' ', /* pad char */ | ||
| 6793 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6794 | 1, /* levels_for_compare */ | ||
| 6795 | &my_charset_utf8_handler, | ||
| 6796 | &my_collation_any_uca_handler, | ||
| 6797 | PAD_SPACE}; | ||
| 6798 | |||
| 6799 | CHARSET_INFO my_charset_utf8_german2_uca_ci = { | ||
| 6800 | 212, | ||
| 6801 | 0, | ||
| 6802 | 0, /* number */ | ||
| 6803 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6804 | "utf8mb3", /* cs name */ | ||
| 6805 | "utf8mb3_german2_ci", /* m_coll_name */ | ||
| 6806 | "UTF-8 Unicode", /* comment */ | ||
| 6807 | german2, /* tailoring */ | ||
| 6808 | nullptr, /* coll_param */ | ||
| 6809 | ctype_utf8, /* ctype */ | ||
| 6810 | nullptr, /* to_lower */ | ||
| 6811 | nullptr, /* to_upper */ | ||
| 6812 | nullptr, /* sort_order */ | ||
| 6813 | nullptr, /* uca */ | ||
| 6814 | nullptr, /* tab_to_uni */ | ||
| 6815 | nullptr, /* tab_from_uni */ | ||
| 6816 | &my_unicase_default, /* caseinfo */ | ||
| 6817 | nullptr, /* state_map */ | ||
| 6818 | nullptr, /* ident_map */ | ||
| 6819 | 8, /* strxfrm_multiply */ | ||
| 6820 | 1, /* caseup_multiply */ | ||
| 6821 | 1, /* casedn_multiply */ | ||
| 6822 | 1, /* mbminlen */ | ||
| 6823 | 3, /* mbmaxlen */ | ||
| 6824 | 1, /* mbmaxlenlen */ | ||
| 6825 | 9, /* min_sort_char */ | ||
| 6826 | 0xFFFF, /* max_sort_char */ | ||
| 6827 | ' ', /* pad char */ | ||
| 6828 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6829 | 1, /* levels_for_compare */ | ||
| 6830 | &my_charset_utf8_handler, | ||
| 6831 | &my_collation_any_uca_handler, | ||
| 6832 | PAD_SPACE}; | ||
| 6833 | |||
| 6834 | CHARSET_INFO my_charset_utf8_croatian_uca_ci = { | ||
| 6835 | 213, | ||
| 6836 | 0, | ||
| 6837 | 0, /* number */ | ||
| 6838 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6839 | "utf8mb3", /* cs name */ | ||
| 6840 | "utf8mb3_croatian_ci", /* m_coll_name */ | ||
| 6841 | "UTF-8 Unicode", /* comment */ | ||
| 6842 | croatian, /* tailoring */ | ||
| 6843 | nullptr, /* coll_param */ | ||
| 6844 | ctype_utf8, /* ctype */ | ||
| 6845 | nullptr, /* to_lower */ | ||
| 6846 | nullptr, /* to_upper */ | ||
| 6847 | nullptr, /* sort_order */ | ||
| 6848 | nullptr, /* uca */ | ||
| 6849 | nullptr, /* tab_to_uni */ | ||
| 6850 | nullptr, /* tab_from_uni */ | ||
| 6851 | &my_unicase_default, /* caseinfo */ | ||
| 6852 | nullptr, /* state_map */ | ||
| 6853 | nullptr, /* ident_map */ | ||
| 6854 | 8, /* strxfrm_multiply */ | ||
| 6855 | 1, /* caseup_multiply */ | ||
| 6856 | 1, /* casedn_multiply */ | ||
| 6857 | 1, /* mbminlen */ | ||
| 6858 | 3, /* mbmaxlen */ | ||
| 6859 | 1, /* mbmaxlenlen */ | ||
| 6860 | 9, /* min_sort_char */ | ||
| 6861 | 0xFFFF, /* max_sort_char */ | ||
| 6862 | ' ', /* pad char */ | ||
| 6863 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6864 | 1, /* levels_for_compare */ | ||
| 6865 | &my_charset_utf8_handler, | ||
| 6866 | &my_collation_any_uca_handler, | ||
| 6867 | PAD_SPACE}; | ||
| 6868 | |||
| 6869 | CHARSET_INFO my_charset_utf8_unicode_520_ci = { | ||
| 6870 | 214, | ||
| 6871 | 0, | ||
| 6872 | 0, /* number */ | ||
| 6873 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6874 | "utf8mb3", /* csname */ | ||
| 6875 | "utf8mb3_unicode_520_ci", /* m_coll_name */ | ||
| 6876 | "UTF-8 Unicode", /* comment */ | ||
| 6877 | "", /* tailoring */ | ||
| 6878 | nullptr, /* coll_param */ | ||
| 6879 | ctype_utf8, /* ctype */ | ||
| 6880 | nullptr, /* to_lower */ | ||
| 6881 | nullptr, /* to_upper */ | ||
| 6882 | nullptr, /* sort_order */ | ||
| 6883 | &my_uca_v520, /* uca */ | ||
| 6884 | nullptr, /* tab_to_uni */ | ||
| 6885 | nullptr, /* tab_from_uni */ | ||
| 6886 | &my_unicase_unicode520, /* caseinfo */ | ||
| 6887 | nullptr, /* state_map */ | ||
| 6888 | nullptr, /* ident_map */ | ||
| 6889 | 8, /* strxfrm_multiply */ | ||
| 6890 | 1, /* caseup_multiply */ | ||
| 6891 | 1, /* casedn_multiply */ | ||
| 6892 | 1, /* mbminlen */ | ||
| 6893 | 3, /* mbmaxlen */ | ||
| 6894 | 1, /* mbmaxlenlen */ | ||
| 6895 | 9, /* min_sort_char */ | ||
| 6896 | 0xFFFF, /* max_sort_char */ | ||
| 6897 | ' ', /* pad char */ | ||
| 6898 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6899 | 1, /* levels_for_compare */ | ||
| 6900 | &my_charset_utf8_handler, | ||
| 6901 | &my_collation_any_uca_handler, | ||
| 6902 | PAD_SPACE}; | ||
| 6903 | |||
| 6904 | CHARSET_INFO my_charset_utf8_vietnamese_ci = { | ||
| 6905 | 215, | ||
| 6906 | 0, | ||
| 6907 | 0, /* number */ | ||
| 6908 | MY_CS_UTF8MB3_UCA_FLAGS, /* flags */ | ||
| 6909 | "utf8mb3", /* cs name */ | ||
| 6910 | "utf8mb3_vietnamese_ci", /* m_coll_name */ | ||
| 6911 | "UTF-8 Unicode", /* comment */ | ||
| 6912 | vietnamese, /* tailoring */ | ||
| 6913 | nullptr, /* coll_param */ | ||
| 6914 | ctype_utf8, /* ctype */ | ||
| 6915 | nullptr, /* to_lower */ | ||
| 6916 | nullptr, /* to_upper */ | ||
| 6917 | nullptr, /* sort_order */ | ||
| 6918 | nullptr, /* uca */ | ||
| 6919 | nullptr, /* tab_to_uni */ | ||
| 6920 | nullptr, /* tab_from_uni */ | ||
| 6921 | &my_unicase_default, /* caseinfo */ | ||
| 6922 | nullptr, /* state_map */ | ||
| 6923 | nullptr, /* ident_map */ | ||
| 6924 | 8, /* strxfrm_multiply */ | ||
| 6925 | 1, /* caseup_multiply */ | ||
| 6926 | 1, /* casedn_multiply */ | ||
| 6927 | 1, /* mbminlen */ | ||
| 6928 | 3, /* mbmaxlen */ | ||
| 6929 | 1, /* mbmaxlenlen */ | ||
| 6930 | 9, /* min_sort_char */ | ||
| 6931 | 0xFFFF, /* max_sort_char */ | ||
| 6932 | ' ', /* pad char */ | ||
| 6933 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6934 | 1, /* levels_for_compare */ | ||
| 6935 | &my_charset_utf8_handler, | ||
| 6936 | &my_collation_any_uca_handler, | ||
| 6937 | PAD_SPACE}; | ||
| 6938 | |||
| 6939 | extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler; | ||
| 6940 | |||
| 6941 | #define MY_CS_UTF8MB4_UCA_FLAGS \ | ||
| 6942 | (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_UNICODE_SUPPLEMENT) | ||
| 6943 | |||
| 6944 | CHARSET_INFO my_charset_utf8mb4_unicode_ci = { | ||
| 6945 | 224, | ||
| 6946 | 0, | ||
| 6947 | 0, /* number */ | ||
| 6948 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 6949 | MY_UTF8MB4, /* csname */ | ||
| 6950 | MY_UTF8MB4 "_unicode_ci", /* m_coll_name */ | ||
| 6951 | "UTF-8 Unicode", /* comment */ | ||
| 6952 | "", /* tailoring */ | ||
| 6953 | nullptr, /* coll_param */ | ||
| 6954 | ctype_utf8, /* ctype */ | ||
| 6955 | nullptr, /* to_lower */ | ||
| 6956 | nullptr, /* to_upper */ | ||
| 6957 | nullptr, /* sort_order */ | ||
| 6958 | nullptr, /* uca */ | ||
| 6959 | nullptr, /* tab_to_uni */ | ||
| 6960 | nullptr, /* tab_from_uni */ | ||
| 6961 | &my_unicase_default, /* caseinfo */ | ||
| 6962 | nullptr, /* state_map */ | ||
| 6963 | nullptr, /* ident_map */ | ||
| 6964 | 8, /* strxfrm_multiply */ | ||
| 6965 | 1, /* caseup_multiply */ | ||
| 6966 | 1, /* casedn_multiply */ | ||
| 6967 | 1, /* mbminlen */ | ||
| 6968 | 4, /* mbmaxlen */ | ||
| 6969 | 1, /* mbmaxlenlen */ | ||
| 6970 | 9, /* min_sort_char */ | ||
| 6971 | 0xFFFF, /* max_sort_char */ | ||
| 6972 | ' ', /* pad char */ | ||
| 6973 | false, /* escape_with_backslash_is_dangerous */ | ||
| 6974 | 1, /* levels_for_compare */ | ||
| 6975 | &my_charset_utf8mb4_handler, | ||
| 6976 | &my_collation_any_uca_handler, | ||
| 6977 | PAD_SPACE}; | ||
| 6978 | |||
| 6979 | CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci = { | ||
| 6980 | 225, | ||
| 6981 | 0, | ||
| 6982 | 0, /* number */ | ||
| 6983 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 6984 | MY_UTF8MB4, /* csname */ | ||
| 6985 | MY_UTF8MB4 "_icelandic_ci", /* m_coll_name */ | ||
| 6986 | "UTF-8 Unicode", /* comment */ | ||
| 6987 | icelandic, /* tailoring */ | ||
| 6988 | nullptr, /* coll_param */ | ||
| 6989 | ctype_utf8, /* ctype */ | ||
| 6990 | nullptr, /* to_lower */ | ||
| 6991 | nullptr, /* to_upper */ | ||
| 6992 | nullptr, /* sort_order */ | ||
| 6993 | nullptr, /* uca */ | ||
| 6994 | nullptr, /* tab_to_uni */ | ||
| 6995 | nullptr, /* tab_from_uni */ | ||
| 6996 | &my_unicase_default, /* caseinfo */ | ||
| 6997 | nullptr, /* state_map */ | ||
| 6998 | nullptr, /* ident_map */ | ||
| 6999 | 8, /* strxfrm_multiply */ | ||
| 7000 | 1, /* caseup_multiply */ | ||
| 7001 | 1, /* casedn_multiply */ | ||
| 7002 | 1, /* mbminlen */ | ||
| 7003 | 4, /* mbmaxlen */ | ||
| 7004 | 1, /* mbmaxlenlen */ | ||
| 7005 | 9, /* min_sort_char */ | ||
| 7006 | 0xFFFF, /* max_sort_char */ | ||
| 7007 | ' ', /* pad char */ | ||
| 7008 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7009 | 1, /* levels_for_compare */ | ||
| 7010 | &my_charset_utf8mb4_handler, | ||
| 7011 | &my_collation_any_uca_handler, | ||
| 7012 | PAD_SPACE}; | ||
| 7013 | |||
| 7014 | CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci = { | ||
| 7015 | 226, | ||
| 7016 | 0, | ||
| 7017 | 0, /* number */ | ||
| 7018 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7019 | MY_UTF8MB4, /* csname */ | ||
| 7020 | MY_UTF8MB4 "_latvian_ci", /* m_coll_name */ | ||
| 7021 | "UTF-8 Unicode", /* comment */ | ||
| 7022 | latvian, /* tailoring */ | ||
| 7023 | nullptr, /* coll_param */ | ||
| 7024 | ctype_utf8, /* ctype */ | ||
| 7025 | nullptr, /* to_lower */ | ||
| 7026 | nullptr, /* to_upper */ | ||
| 7027 | nullptr, /* sort_order */ | ||
| 7028 | nullptr, /* uca */ | ||
| 7029 | nullptr, /* tab_to_uni */ | ||
| 7030 | nullptr, /* tab_from_uni */ | ||
| 7031 | &my_unicase_default, /* caseinfo */ | ||
| 7032 | nullptr, /* state_map */ | ||
| 7033 | nullptr, /* ident_map */ | ||
| 7034 | 8, /* strxfrm_multiply */ | ||
| 7035 | 1, /* caseup_multiply */ | ||
| 7036 | 1, /* casedn_multiply */ | ||
| 7037 | 1, /* mbminlen */ | ||
| 7038 | 4, /* mbmaxlen */ | ||
| 7039 | 1, /* mbmaxlenlen */ | ||
| 7040 | 9, /* min_sort_char */ | ||
| 7041 | 0xFFFF, /* max_sort_char */ | ||
| 7042 | ' ', /* pad char */ | ||
| 7043 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7044 | 1, /* levels_for_compare */ | ||
| 7045 | &my_charset_utf8mb4_handler, | ||
| 7046 | &my_collation_any_uca_handler, | ||
| 7047 | PAD_SPACE}; | ||
| 7048 | |||
| 7049 | CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci = { | ||
| 7050 | 227, | ||
| 7051 | 0, | ||
| 7052 | 0, /* number */ | ||
| 7053 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7054 | MY_UTF8MB4, /* csname */ | ||
| 7055 | MY_UTF8MB4 "_romanian_ci", /* m_coll_name */ | ||
| 7056 | "UTF-8 Unicode", /* comment */ | ||
| 7057 | romanian, /* tailoring */ | ||
| 7058 | nullptr, /* coll_param */ | ||
| 7059 | ctype_utf8, /* ctype */ | ||
| 7060 | nullptr, /* to_lower */ | ||
| 7061 | nullptr, /* to_upper */ | ||
| 7062 | nullptr, /* sort_order */ | ||
| 7063 | nullptr, /* uca */ | ||
| 7064 | nullptr, /* tab_to_uni */ | ||
| 7065 | nullptr, /* tab_from_uni */ | ||
| 7066 | &my_unicase_default, /* caseinfo */ | ||
| 7067 | nullptr, /* state_map */ | ||
| 7068 | nullptr, /* ident_map */ | ||
| 7069 | 8, /* strxfrm_multiply */ | ||
| 7070 | 1, /* caseup_multiply */ | ||
| 7071 | 1, /* casedn_multiply */ | ||
| 7072 | 1, /* mbminlen */ | ||
| 7073 | 4, /* mbmaxlen */ | ||
| 7074 | 1, /* mbmaxlenlen */ | ||
| 7075 | 9, /* min_sort_char */ | ||
| 7076 | 0xFFFF, /* max_sort_char */ | ||
| 7077 | ' ', /* pad char */ | ||
| 7078 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7079 | 1, /* levels_for_compare */ | ||
| 7080 | &my_charset_utf8mb4_handler, | ||
| 7081 | &my_collation_any_uca_handler, | ||
| 7082 | PAD_SPACE}; | ||
| 7083 | |||
| 7084 | CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci = { | ||
| 7085 | 228, | ||
| 7086 | 0, | ||
| 7087 | 0, /* number */ | ||
| 7088 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7089 | MY_UTF8MB4, /* csname */ | ||
| 7090 | MY_UTF8MB4 "_slovenian_ci", /* m_coll_name */ | ||
| 7091 | "UTF-8 Unicode", /* comment */ | ||
| 7092 | slovenian, /* tailoring */ | ||
| 7093 | nullptr, /* coll_param */ | ||
| 7094 | ctype_utf8, /* ctype */ | ||
| 7095 | nullptr, /* to_lower */ | ||
| 7096 | nullptr, /* to_upper */ | ||
| 7097 | nullptr, /* sort_order */ | ||
| 7098 | nullptr, /* uca */ | ||
| 7099 | nullptr, /* tab_to_uni */ | ||
| 7100 | nullptr, /* tab_from_uni */ | ||
| 7101 | &my_unicase_default, /* caseinfo */ | ||
| 7102 | nullptr, /* state_map */ | ||
| 7103 | nullptr, /* ident_map */ | ||
| 7104 | 8, /* strxfrm_multiply */ | ||
| 7105 | 1, /* caseup_multiply */ | ||
| 7106 | 1, /* casedn_multiply */ | ||
| 7107 | 1, /* mbminlen */ | ||
| 7108 | 4, /* mbmaxlen */ | ||
| 7109 | 1, /* mbmaxlenlen */ | ||
| 7110 | 9, /* min_sort_char */ | ||
| 7111 | 0xFFFF, /* max_sort_char */ | ||
| 7112 | ' ', /* pad char */ | ||
| 7113 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7114 | 1, /* levels_for_compare */ | ||
| 7115 | &my_charset_utf8mb4_handler, | ||
| 7116 | &my_collation_any_uca_handler, | ||
| 7117 | PAD_SPACE}; | ||
| 7118 | |||
| 7119 | CHARSET_INFO my_charset_utf8mb4_polish_uca_ci = { | ||
| 7120 | 229, | ||
| 7121 | 0, | ||
| 7122 | 0, /* number */ | ||
| 7123 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7124 | MY_UTF8MB4, /* csname */ | ||
| 7125 | MY_UTF8MB4 "_polish_ci", /* m_coll_name */ | ||
| 7126 | "UTF-8 Unicode", /* comment */ | ||
| 7127 | polish, /* tailoring */ | ||
| 7128 | nullptr, /* coll_param */ | ||
| 7129 | ctype_utf8, /* ctype */ | ||
| 7130 | nullptr, /* to_lower */ | ||
| 7131 | nullptr, /* to_upper */ | ||
| 7132 | nullptr, /* sort_order */ | ||
| 7133 | nullptr, /* uca */ | ||
| 7134 | nullptr, /* tab_to_uni */ | ||
| 7135 | nullptr, /* tab_from_uni */ | ||
| 7136 | &my_unicase_default, /* caseinfo */ | ||
| 7137 | nullptr, /* state_map */ | ||
| 7138 | nullptr, /* ident_map */ | ||
| 7139 | 8, /* strxfrm_multiply */ | ||
| 7140 | 1, /* caseup_multiply */ | ||
| 7141 | 1, /* casedn_multiply */ | ||
| 7142 | 1, /* mbminlen */ | ||
| 7143 | 4, /* mbmaxlen */ | ||
| 7144 | 1, /* mbmaxlenlen */ | ||
| 7145 | 9, /* min_sort_char */ | ||
| 7146 | 0xFFFF, /* max_sort_char */ | ||
| 7147 | ' ', /* pad char */ | ||
| 7148 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7149 | 1, /* levels_for_compare */ | ||
| 7150 | &my_charset_utf8mb4_handler, | ||
| 7151 | &my_collation_any_uca_handler, | ||
| 7152 | PAD_SPACE}; | ||
| 7153 | |||
| 7154 | CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci = { | ||
| 7155 | 230, | ||
| 7156 | 0, | ||
| 7157 | 0, /* number */ | ||
| 7158 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7159 | MY_UTF8MB4, /* csname */ | ||
| 7160 | MY_UTF8MB4 "_estonian_ci", /* m_coll_name */ | ||
| 7161 | "UTF-8 Unicode", /* comment */ | ||
| 7162 | estonian, /* tailoring */ | ||
| 7163 | nullptr, /* coll_param */ | ||
| 7164 | ctype_utf8, /* ctype */ | ||
| 7165 | nullptr, /* to_lower */ | ||
| 7166 | nullptr, /* to_upper */ | ||
| 7167 | nullptr, /* sort_order */ | ||
| 7168 | nullptr, /* uca */ | ||
| 7169 | nullptr, /* tab_to_uni */ | ||
| 7170 | nullptr, /* tab_from_uni */ | ||
| 7171 | &my_unicase_default, /* caseinfo */ | ||
| 7172 | nullptr, /* state_map */ | ||
| 7173 | nullptr, /* ident_map */ | ||
| 7174 | 8, /* strxfrm_multiply */ | ||
| 7175 | 1, /* caseup_multiply */ | ||
| 7176 | 1, /* casedn_multiply */ | ||
| 7177 | 1, /* mbminlen */ | ||
| 7178 | 4, /* mbmaxlen */ | ||
| 7179 | 1, /* mbmaxlenlen */ | ||
| 7180 | 9, /* min_sort_char */ | ||
| 7181 | 0xFFFF, /* max_sort_char */ | ||
| 7182 | ' ', /* pad char */ | ||
| 7183 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7184 | 1, /* levels_for_compare */ | ||
| 7185 | &my_charset_utf8mb4_handler, | ||
| 7186 | &my_collation_any_uca_handler, | ||
| 7187 | PAD_SPACE}; | ||
| 7188 | |||
| 7189 | CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci = { | ||
| 7190 | 231, | ||
| 7191 | 0, | ||
| 7192 | 0, /* number */ | ||
| 7193 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7194 | MY_UTF8MB4, /* csname */ | ||
| 7195 | MY_UTF8MB4 "_spanish_ci", /* m_coll_name */ | ||
| 7196 | "UTF-8 Unicode", /* comment */ | ||
| 7197 | spanish, /* tailoring */ | ||
| 7198 | nullptr, /* coll_param */ | ||
| 7199 | ctype_utf8, /* ctype */ | ||
| 7200 | nullptr, /* to_lower */ | ||
| 7201 | nullptr, /* to_upper */ | ||
| 7202 | nullptr, /* sort_order */ | ||
| 7203 | nullptr, /* uca */ | ||
| 7204 | nullptr, /* tab_to_uni */ | ||
| 7205 | nullptr, /* tab_from_uni */ | ||
| 7206 | &my_unicase_default, /* caseinfo */ | ||
| 7207 | nullptr, /* state_map */ | ||
| 7208 | nullptr, /* ident_map */ | ||
| 7209 | 8, /* strxfrm_multiply */ | ||
| 7210 | 1, /* caseup_multiply */ | ||
| 7211 | 1, /* casedn_multiply */ | ||
| 7212 | 1, /* mbminlen */ | ||
| 7213 | 4, /* mbmaxlen */ | ||
| 7214 | 1, /* mbmaxlenlen */ | ||
| 7215 | 9, /* min_sort_char */ | ||
| 7216 | 0xFFFF, /* max_sort_char */ | ||
| 7217 | ' ', /* pad char */ | ||
| 7218 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7219 | 1, /* levels_for_compare */ | ||
| 7220 | &my_charset_utf8mb4_handler, | ||
| 7221 | &my_collation_any_uca_handler, | ||
| 7222 | PAD_SPACE}; | ||
| 7223 | |||
| 7224 | CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci = { | ||
| 7225 | 232, | ||
| 7226 | 0, | ||
| 7227 | 0, /* number */ | ||
| 7228 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7229 | MY_UTF8MB4, /* csname */ | ||
| 7230 | MY_UTF8MB4 "_swedish_ci", /* m_coll_name */ | ||
| 7231 | "UTF-8 Unicode", /* comment */ | ||
| 7232 | swedish, /* tailoring */ | ||
| 7233 | nullptr, /* coll_param */ | ||
| 7234 | ctype_utf8, /* ctype */ | ||
| 7235 | nullptr, /* to_lower */ | ||
| 7236 | nullptr, /* to_upper */ | ||
| 7237 | nullptr, /* sort_order */ | ||
| 7238 | nullptr, /* uca */ | ||
| 7239 | nullptr, /* tab_to_uni */ | ||
| 7240 | nullptr, /* tab_from_uni */ | ||
| 7241 | &my_unicase_default, /* caseinfo */ | ||
| 7242 | nullptr, /* state_map */ | ||
| 7243 | nullptr, /* ident_map */ | ||
| 7244 | 8, /* strxfrm_multiply */ | ||
| 7245 | 1, /* caseup_multiply */ | ||
| 7246 | 1, /* casedn_multiply */ | ||
| 7247 | 1, /* mbminlen */ | ||
| 7248 | 4, /* mbmaxlen */ | ||
| 7249 | 1, /* mbmaxlenlen */ | ||
| 7250 | 9, /* min_sort_char */ | ||
| 7251 | 0xFFFF, /* max_sort_char */ | ||
| 7252 | ' ', /* pad char */ | ||
| 7253 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7254 | 1, /* levels_for_compare */ | ||
| 7255 | &my_charset_utf8mb4_handler, | ||
| 7256 | &my_collation_any_uca_handler, | ||
| 7257 | PAD_SPACE}; | ||
| 7258 | |||
| 7259 | CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci = { | ||
| 7260 | 233, | ||
| 7261 | 0, | ||
| 7262 | 0, /* number */ | ||
| 7263 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7264 | MY_UTF8MB4, /* csname */ | ||
| 7265 | MY_UTF8MB4 "_turkish_ci", /* m_coll_name */ | ||
| 7266 | "UTF-8 Unicode", /* comment */ | ||
| 7267 | turkish, /* tailoring */ | ||
| 7268 | nullptr, /* coll_param */ | ||
| 7269 | ctype_utf8, /* ctype */ | ||
| 7270 | nullptr, /* to_lower */ | ||
| 7271 | nullptr, /* to_upper */ | ||
| 7272 | nullptr, /* sort_order */ | ||
| 7273 | nullptr, /* uca */ | ||
| 7274 | nullptr, /* tab_to_uni */ | ||
| 7275 | nullptr, /* tab_from_uni */ | ||
| 7276 | &my_unicase_turkish, /* caseinfo */ | ||
| 7277 | nullptr, /* state_map */ | ||
| 7278 | nullptr, /* ident_map */ | ||
| 7279 | 8, /* strxfrm_multiply */ | ||
| 7280 | 2, /* caseup_multiply */ | ||
| 7281 | 2, /* casedn_multiply */ | ||
| 7282 | 1, /* mbminlen */ | ||
| 7283 | 4, /* mbmaxlen */ | ||
| 7284 | 1, /* mbmaxlenlen */ | ||
| 7285 | 9, /* min_sort_char */ | ||
| 7286 | 0xFFFF, /* max_sort_char */ | ||
| 7287 | ' ', /* pad char */ | ||
| 7288 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7289 | 1, /* levels_for_compare */ | ||
| 7290 | &my_charset_utf8mb4_handler, | ||
| 7291 | &my_collation_any_uca_handler, | ||
| 7292 | PAD_SPACE}; | ||
| 7293 | |||
| 7294 | CHARSET_INFO my_charset_utf8mb4_czech_uca_ci = { | ||
| 7295 | 234, | ||
| 7296 | 0, | ||
| 7297 | 0, /* number */ | ||
| 7298 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7299 | MY_UTF8MB4, /* csname */ | ||
| 7300 | MY_UTF8MB4 "_czech_ci", /* m_coll_name */ | ||
| 7301 | "UTF-8 Unicode", /* comment */ | ||
| 7302 | czech, /* tailoring */ | ||
| 7303 | nullptr, /* coll_param */ | ||
| 7304 | ctype_utf8, /* ctype */ | ||
| 7305 | nullptr, /* to_lower */ | ||
| 7306 | nullptr, /* to_upper */ | ||
| 7307 | nullptr, /* sort_order */ | ||
| 7308 | nullptr, /* uca */ | ||
| 7309 | nullptr, /* tab_to_uni */ | ||
| 7310 | nullptr, /* tab_from_uni */ | ||
| 7311 | &my_unicase_default, /* caseinfo */ | ||
| 7312 | nullptr, /* state_map */ | ||
| 7313 | nullptr, /* ident_map */ | ||
| 7314 | 8, /* strxfrm_multiply */ | ||
| 7315 | 1, /* caseup_multiply */ | ||
| 7316 | 1, /* casedn_multiply */ | ||
| 7317 | 1, /* mbminlen */ | ||
| 7318 | 4, /* mbmaxlen */ | ||
| 7319 | 1, /* mbmaxlenlen */ | ||
| 7320 | 9, /* min_sort_char */ | ||
| 7321 | 0xFFFF, /* max_sort_char */ | ||
| 7322 | ' ', /* pad char */ | ||
| 7323 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7324 | 1, /* levels_for_compare */ | ||
| 7325 | &my_charset_utf8mb4_handler, | ||
| 7326 | &my_collation_any_uca_handler, | ||
| 7327 | PAD_SPACE}; | ||
| 7328 | |||
| 7329 | CHARSET_INFO my_charset_utf8mb4_danish_uca_ci = { | ||
| 7330 | 235, | ||
| 7331 | 0, | ||
| 7332 | 0, /* number */ | ||
| 7333 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7334 | MY_UTF8MB4, /* csname */ | ||
| 7335 | MY_UTF8MB4 "_danish_ci", /* m_coll_name */ | ||
| 7336 | "UTF-8 Unicode", /* comment */ | ||
| 7337 | danish, /* tailoring */ | ||
| 7338 | nullptr, /* coll_param */ | ||
| 7339 | ctype_utf8, /* ctype */ | ||
| 7340 | nullptr, /* to_lower */ | ||
| 7341 | nullptr, /* to_upper */ | ||
| 7342 | nullptr, /* sort_order */ | ||
| 7343 | nullptr, /* uca */ | ||
| 7344 | nullptr, /* tab_to_uni */ | ||
| 7345 | nullptr, /* tab_from_uni */ | ||
| 7346 | &my_unicase_default, /* caseinfo */ | ||
| 7347 | nullptr, /* state_map */ | ||
| 7348 | nullptr, /* ident_map */ | ||
| 7349 | 8, /* strxfrm_multiply */ | ||
| 7350 | 1, /* caseup_multiply */ | ||
| 7351 | 1, /* casedn_multiply */ | ||
| 7352 | 1, /* mbminlen */ | ||
| 7353 | 4, /* mbmaxlen */ | ||
| 7354 | 1, /* mbmaxlenlen */ | ||
| 7355 | 9, /* min_sort_char */ | ||
| 7356 | 0xFFFF, /* max_sort_char */ | ||
| 7357 | ' ', /* pad char */ | ||
| 7358 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7359 | 1, /* levels_for_compare */ | ||
| 7360 | &my_charset_utf8mb4_handler, | ||
| 7361 | &my_collation_any_uca_handler, | ||
| 7362 | PAD_SPACE}; | ||
| 7363 | |||
| 7364 | CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci = { | ||
| 7365 | 236, | ||
| 7366 | 0, | ||
| 7367 | 0, /* number */ | ||
| 7368 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7369 | MY_UTF8MB4, /* csname */ | ||
| 7370 | MY_UTF8MB4 "_lithuanian_ci", /* m_coll_name */ | ||
| 7371 | "UTF-8 Unicode", /* comment */ | ||
| 7372 | lithuanian, /* tailoring */ | ||
| 7373 | nullptr, /* coll_param */ | ||
| 7374 | ctype_utf8, /* ctype */ | ||
| 7375 | nullptr, /* to_lower */ | ||
| 7376 | nullptr, /* to_upper */ | ||
| 7377 | nullptr, /* sort_order */ | ||
| 7378 | nullptr, /* uca */ | ||
| 7379 | nullptr, /* tab_to_uni */ | ||
| 7380 | nullptr, /* tab_from_uni */ | ||
| 7381 | &my_unicase_default, /* caseinfo */ | ||
| 7382 | nullptr, /* state_map */ | ||
| 7383 | nullptr, /* ident_map */ | ||
| 7384 | 8, /* strxfrm_multiply */ | ||
| 7385 | 1, /* caseup_multiply */ | ||
| 7386 | 1, /* casedn_multiply */ | ||
| 7387 | 1, /* mbminlen */ | ||
| 7388 | 4, /* mbmaxlen */ | ||
| 7389 | 1, /* mbmaxlenlen */ | ||
| 7390 | 9, /* min_sort_char */ | ||
| 7391 | 0xFFFF, /* max_sort_char */ | ||
| 7392 | ' ', /* pad char */ | ||
| 7393 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7394 | 1, /* levels_for_compare */ | ||
| 7395 | &my_charset_utf8mb4_handler, | ||
| 7396 | &my_collation_any_uca_handler, | ||
| 7397 | PAD_SPACE}; | ||
| 7398 | |||
| 7399 | CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci = { | ||
| 7400 | 237, | ||
| 7401 | 0, | ||
| 7402 | 0, /* number */ | ||
| 7403 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7404 | MY_UTF8MB4, /* csname */ | ||
| 7405 | MY_UTF8MB4 "_slovak_ci", /* m_coll_name */ | ||
| 7406 | "UTF-8 Unicode", /* comment */ | ||
| 7407 | slovak, /* tailoring */ | ||
| 7408 | nullptr, /* coll_param */ | ||
| 7409 | ctype_utf8, /* ctype */ | ||
| 7410 | nullptr, /* to_lower */ | ||
| 7411 | nullptr, /* to_upper */ | ||
| 7412 | nullptr, /* sort_order */ | ||
| 7413 | nullptr, /* uca */ | ||
| 7414 | nullptr, /* tab_to_uni */ | ||
| 7415 | nullptr, /* tab_from_uni */ | ||
| 7416 | &my_unicase_default, /* caseinfo */ | ||
| 7417 | nullptr, /* state_map */ | ||
| 7418 | nullptr, /* ident_map */ | ||
| 7419 | 8, /* strxfrm_multiply */ | ||
| 7420 | 1, /* caseup_multiply */ | ||
| 7421 | 1, /* casedn_multiply */ | ||
| 7422 | 1, /* mbminlen */ | ||
| 7423 | 4, /* mbmaxlen */ | ||
| 7424 | 1, /* mbmaxlenlen */ | ||
| 7425 | 9, /* min_sort_char */ | ||
| 7426 | 0xFFFF, /* max_sort_char */ | ||
| 7427 | ' ', /* pad char */ | ||
| 7428 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7429 | 1, /* levels_for_compare */ | ||
| 7430 | &my_charset_utf8mb4_handler, | ||
| 7431 | &my_collation_any_uca_handler, | ||
| 7432 | PAD_SPACE}; | ||
| 7433 | |||
| 7434 | CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci = { | ||
| 7435 | 238, | ||
| 7436 | 0, | ||
| 7437 | 0, /* number */ | ||
| 7438 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7439 | MY_UTF8MB4, /* csname */ | ||
| 7440 | MY_UTF8MB4 "_spanish2_ci", /* m_coll_name */ | ||
| 7441 | "UTF-8 Unicode", /* comment */ | ||
| 7442 | spanish2, /* tailoring */ | ||
| 7443 | nullptr, /* coll_param */ | ||
| 7444 | ctype_utf8, /* ctype */ | ||
| 7445 | nullptr, /* to_lower */ | ||
| 7446 | nullptr, /* to_upper */ | ||
| 7447 | nullptr, /* sort_order */ | ||
| 7448 | nullptr, /* uca */ | ||
| 7449 | nullptr, /* tab_to_uni */ | ||
| 7450 | nullptr, /* tab_from_uni */ | ||
| 7451 | &my_unicase_default, /* caseinfo */ | ||
| 7452 | nullptr, /* state_map */ | ||
| 7453 | nullptr, /* ident_map */ | ||
| 7454 | 8, /* strxfrm_multiply */ | ||
| 7455 | 1, /* caseup_multiply */ | ||
| 7456 | 1, /* casedn_multiply */ | ||
| 7457 | 1, /* mbminlen */ | ||
| 7458 | 4, /* mbmaxlen */ | ||
| 7459 | 1, /* mbmaxlenlen */ | ||
| 7460 | 9, /* min_sort_char */ | ||
| 7461 | 0xFFFF, /* max_sort_char */ | ||
| 7462 | ' ', /* pad char */ | ||
| 7463 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7464 | 1, /* levels_for_compare */ | ||
| 7465 | &my_charset_utf8mb4_handler, | ||
| 7466 | &my_collation_any_uca_handler, | ||
| 7467 | PAD_SPACE}; | ||
| 7468 | |||
| 7469 | CHARSET_INFO my_charset_utf8mb4_roman_uca_ci = { | ||
| 7470 | 239, | ||
| 7471 | 0, | ||
| 7472 | 0, /* number */ | ||
| 7473 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7474 | MY_UTF8MB4, /* csname */ | ||
| 7475 | MY_UTF8MB4 "_roman_ci", /* m_coll_name */ | ||
| 7476 | "UTF-8 Unicode", /* comment */ | ||
| 7477 | roman, /* tailoring */ | ||
| 7478 | nullptr, /* coll_param */ | ||
| 7479 | ctype_utf8, /* ctype */ | ||
| 7480 | nullptr, /* to_lower */ | ||
| 7481 | nullptr, /* to_upper */ | ||
| 7482 | nullptr, /* sort_order */ | ||
| 7483 | nullptr, /* uca */ | ||
| 7484 | nullptr, /* tab_to_uni */ | ||
| 7485 | nullptr, /* tab_from_uni */ | ||
| 7486 | &my_unicase_default, /* caseinfo */ | ||
| 7487 | nullptr, /* state_map */ | ||
| 7488 | nullptr, /* ident_map */ | ||
| 7489 | 8, /* strxfrm_multiply */ | ||
| 7490 | 1, /* caseup_multiply */ | ||
| 7491 | 1, /* casedn_multiply */ | ||
| 7492 | 1, /* mbminlen */ | ||
| 7493 | 4, /* mbmaxlen */ | ||
| 7494 | 1, /* mbmaxlenlen */ | ||
| 7495 | 9, /* min_sort_char */ | ||
| 7496 | 0xFFFF, /* max_sort_char */ | ||
| 7497 | ' ', /* pad char */ | ||
| 7498 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7499 | 1, /* levels_for_compare */ | ||
| 7500 | &my_charset_utf8mb4_handler, | ||
| 7501 | &my_collation_any_uca_handler, | ||
| 7502 | PAD_SPACE}; | ||
| 7503 | |||
| 7504 | CHARSET_INFO my_charset_utf8mb4_persian_uca_ci = { | ||
| 7505 | 240, | ||
| 7506 | 0, | ||
| 7507 | 0, /* number */ | ||
| 7508 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7509 | MY_UTF8MB4, /* csname */ | ||
| 7510 | MY_UTF8MB4 "_persian_ci", /* m_coll_name */ | ||
| 7511 | "UTF-8 Unicode", /* comment */ | ||
| 7512 | persian, /* tailoring */ | ||
| 7513 | nullptr, /* coll_param */ | ||
| 7514 | ctype_utf8, /* ctype */ | ||
| 7515 | nullptr, /* to_lower */ | ||
| 7516 | nullptr, /* to_upper */ | ||
| 7517 | nullptr, /* sort_order */ | ||
| 7518 | nullptr, /* uca */ | ||
| 7519 | nullptr, /* tab_to_uni */ | ||
| 7520 | nullptr, /* tab_from_uni */ | ||
| 7521 | &my_unicase_default, /* caseinfo */ | ||
| 7522 | nullptr, /* state_map */ | ||
| 7523 | nullptr, /* ident_map */ | ||
| 7524 | 8, /* strxfrm_multiply */ | ||
| 7525 | 1, /* caseup_multiply */ | ||
| 7526 | 1, /* casedn_multiply */ | ||
| 7527 | 1, /* mbminlen */ | ||
| 7528 | 4, /* mbmaxlen */ | ||
| 7529 | 1, /* mbmaxlenlen */ | ||
| 7530 | 9, /* min_sort_char */ | ||
| 7531 | 0xFFFF, /* max_sort_char */ | ||
| 7532 | ' ', /* pad char */ | ||
| 7533 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7534 | 1, /* levels_for_compare */ | ||
| 7535 | &my_charset_utf8mb4_handler, | ||
| 7536 | &my_collation_any_uca_handler, | ||
| 7537 | PAD_SPACE}; | ||
| 7538 | |||
| 7539 | CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci = { | ||
| 7540 | 241, | ||
| 7541 | 0, | ||
| 7542 | 0, /* number */ | ||
| 7543 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7544 | MY_UTF8MB4, /* csname */ | ||
| 7545 | MY_UTF8MB4 "_esperanto_ci", /* m_coll_name */ | ||
| 7546 | "UTF-8 Unicode", /* comment */ | ||
| 7547 | esperanto, /* tailoring */ | ||
| 7548 | nullptr, /* coll_param */ | ||
| 7549 | ctype_utf8, /* ctype */ | ||
| 7550 | nullptr, /* to_lower */ | ||
| 7551 | nullptr, /* to_upper */ | ||
| 7552 | nullptr, /* sort_order */ | ||
| 7553 | nullptr, /* uca */ | ||
| 7554 | nullptr, /* tab_to_uni */ | ||
| 7555 | nullptr, /* tab_from_uni */ | ||
| 7556 | &my_unicase_default, /* caseinfo */ | ||
| 7557 | nullptr, /* state_map */ | ||
| 7558 | nullptr, /* ident_map */ | ||
| 7559 | 8, /* strxfrm_multiply */ | ||
| 7560 | 1, /* caseup_multiply */ | ||
| 7561 | 1, /* casedn_multiply */ | ||
| 7562 | 1, /* mbminlen */ | ||
| 7563 | 4, /* mbmaxlen */ | ||
| 7564 | 1, /* mbmaxlenlen */ | ||
| 7565 | 9, /* min_sort_char */ | ||
| 7566 | 0xFFFF, /* max_sort_char */ | ||
| 7567 | ' ', /* pad char */ | ||
| 7568 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7569 | 1, /* levels_for_compare */ | ||
| 7570 | &my_charset_utf8mb4_handler, | ||
| 7571 | &my_collation_any_uca_handler, | ||
| 7572 | PAD_SPACE}; | ||
| 7573 | |||
| 7574 | CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci = { | ||
| 7575 | 242, | ||
| 7576 | 0, | ||
| 7577 | 0, /* number */ | ||
| 7578 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7579 | MY_UTF8MB4, /* csname */ | ||
| 7580 | MY_UTF8MB4 "_hungarian_ci", /* m_coll_name */ | ||
| 7581 | "UTF-8 Unicode", /* comment */ | ||
| 7582 | hungarian, /* tailoring */ | ||
| 7583 | nullptr, /* coll_param */ | ||
| 7584 | ctype_utf8, /* ctype */ | ||
| 7585 | nullptr, /* to_lower */ | ||
| 7586 | nullptr, /* to_upper */ | ||
| 7587 | nullptr, /* sort_order */ | ||
| 7588 | nullptr, /* uca */ | ||
| 7589 | nullptr, /* tab_to_uni */ | ||
| 7590 | nullptr, /* tab_from_uni */ | ||
| 7591 | &my_unicase_default, /* caseinfo */ | ||
| 7592 | nullptr, /* state_map */ | ||
| 7593 | nullptr, /* ident_map */ | ||
| 7594 | 8, /* strxfrm_multiply */ | ||
| 7595 | 1, /* caseup_multiply */ | ||
| 7596 | 1, /* casedn_multiply */ | ||
| 7597 | 1, /* mbminlen */ | ||
| 7598 | 4, /* mbmaxlen */ | ||
| 7599 | 1, /* mbmaxlenlen */ | ||
| 7600 | 9, /* min_sort_char */ | ||
| 7601 | 0xFFFF, /* max_sort_char */ | ||
| 7602 | ' ', /* pad char */ | ||
| 7603 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7604 | 1, /* levels_for_compare */ | ||
| 7605 | &my_charset_utf8mb4_handler, | ||
| 7606 | &my_collation_any_uca_handler, | ||
| 7607 | PAD_SPACE}; | ||
| 7608 | |||
| 7609 | CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci = { | ||
| 7610 | 243, | ||
| 7611 | 0, | ||
| 7612 | 0, /* number */ | ||
| 7613 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7614 | MY_UTF8MB4, /* csname */ | ||
| 7615 | MY_UTF8MB4 "_sinhala_ci", /* m_coll_name */ | ||
| 7616 | "UTF-8 Unicode", /* comment */ | ||
| 7617 | sinhala, /* tailoring */ | ||
| 7618 | nullptr, /* coll_param */ | ||
| 7619 | ctype_utf8, /* ctype */ | ||
| 7620 | nullptr, /* to_lower */ | ||
| 7621 | nullptr, /* to_upper */ | ||
| 7622 | nullptr, /* sort_order */ | ||
| 7623 | nullptr, /* uca */ | ||
| 7624 | nullptr, /* tab_to_uni */ | ||
| 7625 | nullptr, /* tab_from_uni */ | ||
| 7626 | &my_unicase_default, /* caseinfo */ | ||
| 7627 | nullptr, /* state_map */ | ||
| 7628 | nullptr, /* ident_map */ | ||
| 7629 | 8, /* strxfrm_multiply */ | ||
| 7630 | 1, /* caseup_multiply */ | ||
| 7631 | 1, /* casedn_multiply */ | ||
| 7632 | 1, /* mbminlen */ | ||
| 7633 | 4, /* mbmaxlen */ | ||
| 7634 | 1, /* mbmaxlenlen */ | ||
| 7635 | 9, /* min_sort_char */ | ||
| 7636 | 0xFFFF, /* max_sort_char */ | ||
| 7637 | ' ', /* pad char */ | ||
| 7638 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7639 | 1, /* levels_for_compare */ | ||
| 7640 | &my_charset_utf8mb4_handler, | ||
| 7641 | &my_collation_any_uca_handler, | ||
| 7642 | PAD_SPACE}; | ||
| 7643 | |||
| 7644 | CHARSET_INFO my_charset_utf8mb4_german2_uca_ci = { | ||
| 7645 | 244, | ||
| 7646 | 0, | ||
| 7647 | 0, /* number */ | ||
| 7648 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7649 | MY_UTF8MB4, /* csname */ | ||
| 7650 | MY_UTF8MB4 "_german2_ci", /* m_coll_name */ | ||
| 7651 | "UTF-8 Unicode", /* comment */ | ||
| 7652 | german2, /* tailoring */ | ||
| 7653 | nullptr, /* coll_param */ | ||
| 7654 | ctype_utf8, /* ctype */ | ||
| 7655 | nullptr, /* to_lower */ | ||
| 7656 | nullptr, /* to_upper */ | ||
| 7657 | nullptr, /* sort_order */ | ||
| 7658 | nullptr, /* uca */ | ||
| 7659 | nullptr, /* tab_to_uni */ | ||
| 7660 | nullptr, /* tab_from_uni */ | ||
| 7661 | &my_unicase_default, /* caseinfo */ | ||
| 7662 | nullptr, /* state_map */ | ||
| 7663 | nullptr, /* ident_map */ | ||
| 7664 | 8, /* strxfrm_multiply */ | ||
| 7665 | 1, /* caseup_multiply */ | ||
| 7666 | 1, /* casedn_multiply */ | ||
| 7667 | 1, /* mbminlen */ | ||
| 7668 | 4, /* mbmaxlen */ | ||
| 7669 | 1, /* mbmaxlenlen */ | ||
| 7670 | 9, /* min_sort_char */ | ||
| 7671 | 0xFFFF, /* max_sort_char */ | ||
| 7672 | ' ', /* pad char */ | ||
| 7673 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7674 | 1, /* levels_for_compare */ | ||
| 7675 | &my_charset_utf8mb4_handler, | ||
| 7676 | &my_collation_any_uca_handler, | ||
| 7677 | PAD_SPACE}; | ||
| 7678 | |||
| 7679 | CHARSET_INFO my_charset_utf8mb4_croatian_uca_ci = { | ||
| 7680 | 245, | ||
| 7681 | 0, | ||
| 7682 | 0, /* number */ | ||
| 7683 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7684 | MY_UTF8MB4, /* csname */ | ||
| 7685 | MY_UTF8MB4 "_croatian_ci", /* m_coll_name */ | ||
| 7686 | "UTF-8 Unicode", /* comment */ | ||
| 7687 | croatian, /* tailoring */ | ||
| 7688 | nullptr, /* coll_param */ | ||
| 7689 | ctype_utf8, /* ctype */ | ||
| 7690 | nullptr, /* to_lower */ | ||
| 7691 | nullptr, /* to_upper */ | ||
| 7692 | nullptr, /* sort_order */ | ||
| 7693 | nullptr, /* uca */ | ||
| 7694 | nullptr, /* tab_to_uni */ | ||
| 7695 | nullptr, /* tab_from_uni */ | ||
| 7696 | &my_unicase_default, /* caseinfo */ | ||
| 7697 | nullptr, /* state_map */ | ||
| 7698 | nullptr, /* ident_map */ | ||
| 7699 | 8, /* strxfrm_multiply */ | ||
| 7700 | 1, /* caseup_multiply */ | ||
| 7701 | 1, /* casedn_multiply */ | ||
| 7702 | 1, /* mbminlen */ | ||
| 7703 | 4, /* mbmaxlen */ | ||
| 7704 | 1, /* mbmaxlenlen */ | ||
| 7705 | 9, /* min_sort_char */ | ||
| 7706 | 0xFFFF, /* max_sort_char */ | ||
| 7707 | ' ', /* pad char */ | ||
| 7708 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7709 | 1, /* levels_for_compare */ | ||
| 7710 | &my_charset_utf8mb4_handler, | ||
| 7711 | &my_collation_any_uca_handler, | ||
| 7712 | PAD_SPACE}; | ||
| 7713 | |||
| 7714 | CHARSET_INFO my_charset_utf8mb4_unicode_520_ci = { | ||
| 7715 | 246, | ||
| 7716 | 0, | ||
| 7717 | 0, /* number */ | ||
| 7718 | MY_CS_UTF8MB4_UCA_FLAGS, /* flags */ | ||
| 7719 | MY_UTF8MB4, /* csname */ | ||
| 7720 | MY_UTF8MB4 "_unicode_520_ci", /* m_coll_name */ | ||
| 7721 | "UTF-8 Unicode", /* comment */ | ||
| 7722 | "", /* tailoring */ | ||
| 7723 | nullptr, /* coll_param */ | ||
| 7724 | ctype_utf8, /* ctype */ | ||
| 7725 | nullptr, /* to_lower */ | ||
| 7726 | nullptr, /* to_upper */ | ||
| 7727 | nullptr, /* sort_order */ | ||
| 7728 | &my_uca_v520, /* uca */ | ||
| 7729 | nullptr, /* tab_to_uni */ | ||
| 7730 | nullptr, /* tab_from_uni */ | ||
| 7731 | &my_unicase_unicode520, /* caseinfo */ | ||
| 7732 | nullptr, /* state_map */ | ||
| 7733 | nullptr, /* ident_map */ | ||
| 7734 | 8, /* strxfrm_multiply */ | ||
| 7735 | 1, /* caseup_multiply */ | ||
| 7736 | 1, /* casedn_multiply */ | ||
| 7737 | 1, /* mbminlen */ | ||
| 7738 | 4, /* mbmaxlen */ | ||
| 7739 | 1, /* mbmaxlenlen */ | ||
| 7740 | 9, /* min_sort_char */ | ||
| 7741 | 0x10FFFF, /* max_sort_char */ | ||
| 7742 | ' ', /* pad char */ | ||
| 7743 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7744 | 1, /* levels_for_compare */ | ||
| 7745 | &my_charset_utf8mb4_handler, | ||
| 7746 | &my_collation_any_uca_handler, | ||
| 7747 | PAD_SPACE}; | ||
| 7748 | |||
| 7749 | CHARSET_INFO my_charset_utf8mb4_vietnamese_ci = { | ||
| 7750 | 247, | ||
| 7751 | 0, | ||
| 7752 | 0, /* number */ | ||
| 7753 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 7754 | MY_UTF8MB4, /* csname */ | ||
| 7755 | MY_UTF8MB4 "_vietnamese_ci", /* m_coll_name */ | ||
| 7756 | "UTF-8 Unicode", /* comment */ | ||
| 7757 | vietnamese, /* tailoring */ | ||
| 7758 | nullptr, /* coll_param */ | ||
| 7759 | ctype_utf8, /* ctype */ | ||
| 7760 | nullptr, /* to_lower */ | ||
| 7761 | nullptr, /* to_upper */ | ||
| 7762 | nullptr, /* sort_order */ | ||
| 7763 | nullptr, /* uca */ | ||
| 7764 | nullptr, /* tab_to_uni */ | ||
| 7765 | nullptr, /* tab_from_uni */ | ||
| 7766 | &my_unicase_default, /* caseinfo */ | ||
| 7767 | nullptr, /* state_map */ | ||
| 7768 | nullptr, /* ident_map */ | ||
| 7769 | 8, /* strxfrm_multiply */ | ||
| 7770 | 1, /* caseup_multiply */ | ||
| 7771 | 1, /* casedn_multiply */ | ||
| 7772 | 1, /* mbminlen */ | ||
| 7773 | 4, /* mbmaxlen */ | ||
| 7774 | 1, /* mbmaxlenlen */ | ||
| 7775 | 9, /* min_sort_char */ | ||
| 7776 | 0xFFFF, /* max_sort_char */ | ||
| 7777 | ' ', /* pad char */ | ||
| 7778 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7779 | 1, /* levels_for_compare */ | ||
| 7780 | &my_charset_utf8mb4_handler, | ||
| 7781 | &my_collation_any_uca_handler, | ||
| 7782 | PAD_SPACE}; | ||
| 7783 | |||
| 7784 | MY_COLLATION_HANDLER my_collation_utf32_uca_handler = { | ||
| 7785 | my_coll_init_uca, /* init */ | ||
| 7786 | my_coll_uninit_uca, | ||
| 7787 | my_strnncoll_any_uca, | ||
| 7788 | my_strnncollsp_any_uca, | ||
| 7789 | my_strnxfrm_any_uca, | ||
| 7790 | my_strnxfrmlen_simple, | ||
| 7791 | my_like_range_generic, | ||
| 7792 | my_wildcmp_uca, | ||
| 7793 | nullptr, | ||
| 7794 | my_instr_mb, | ||
| 7795 | my_hash_sort_any_uca, | ||
| 7796 | my_propagate_complex}; | ||
| 7797 | |||
| 7798 | extern MY_CHARSET_HANDLER my_charset_utf32_handler; | ||
| 7799 | |||
| 7800 | #define MY_CS_UTF32_UCA_FLAGS \ | ||
| 7801 | (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | \ | ||
| 7802 | MY_CS_UNICODE_SUPPLEMENT | MY_CS_NONASCII) | ||
| 7803 | |||
| 7804 | CHARSET_INFO my_charset_utf32_unicode_ci = { | ||
| 7805 | 160, | ||
| 7806 | 0, | ||
| 7807 | 0, /* number */ | ||
| 7808 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 7809 | "utf32", /* csname */ | ||
| 7810 | "utf32_unicode_ci", /* m_coll_name */ | ||
| 7811 | "", /* comment */ | ||
| 7812 | "", /* tailoring */ | ||
| 7813 | nullptr, /* coll_param */ | ||
| 7814 | nullptr, /* ctype */ | ||
| 7815 | nullptr, /* to_lower */ | ||
| 7816 | nullptr, /* to_upper */ | ||
| 7817 | nullptr, /* sort_order */ | ||
| 7818 | nullptr, /* uca */ | ||
| 7819 | nullptr, /* tab_to_uni */ | ||
| 7820 | nullptr, /* tab_from_uni */ | ||
| 7821 | &my_unicase_default, /* caseinfo */ | ||
| 7822 | nullptr, /* state_map */ | ||
| 7823 | nullptr, /* ident_map */ | ||
| 7824 | 8, /* strxfrm_multiply */ | ||
| 7825 | 1, /* caseup_multiply */ | ||
| 7826 | 1, /* casedn_multiply */ | ||
| 7827 | 4, /* mbminlen */ | ||
| 7828 | 4, /* mbmaxlen */ | ||
| 7829 | 1, /* mbmaxlenlen */ | ||
| 7830 | 9, /* min_sort_char */ | ||
| 7831 | 0xFFFF, /* max_sort_char */ | ||
| 7832 | ' ', /* pad char */ | ||
| 7833 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7834 | 1, /* levels_for_compare */ | ||
| 7835 | &my_charset_utf32_handler, | ||
| 7836 | &my_collation_utf32_uca_handler, | ||
| 7837 | PAD_SPACE}; | ||
| 7838 | |||
| 7839 | CHARSET_INFO my_charset_utf32_icelandic_uca_ci = { | ||
| 7840 | 161, | ||
| 7841 | 0, | ||
| 7842 | 0, /* number */ | ||
| 7843 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 7844 | "utf32", /* csname */ | ||
| 7845 | "utf32_icelandic_ci", /* m_coll_name */ | ||
| 7846 | "", /* comment */ | ||
| 7847 | icelandic, /* tailoring */ | ||
| 7848 | nullptr, /* coll_param */ | ||
| 7849 | nullptr, /* ctype */ | ||
| 7850 | nullptr, /* to_lower */ | ||
| 7851 | nullptr, /* to_upper */ | ||
| 7852 | nullptr, /* sort_order */ | ||
| 7853 | nullptr, /* uca */ | ||
| 7854 | nullptr, /* tab_to_uni */ | ||
| 7855 | nullptr, /* tab_from_uni */ | ||
| 7856 | &my_unicase_default, /* caseinfo */ | ||
| 7857 | nullptr, /* state_map */ | ||
| 7858 | nullptr, /* ident_map */ | ||
| 7859 | 8, /* strxfrm_multiply */ | ||
| 7860 | 1, /* caseup_multiply */ | ||
| 7861 | 1, /* casedn_multiply */ | ||
| 7862 | 4, /* mbminlen */ | ||
| 7863 | 4, /* mbmaxlen */ | ||
| 7864 | 1, /* mbmaxlenlen */ | ||
| 7865 | 9, /* min_sort_char */ | ||
| 7866 | 0xFFFF, /* max_sort_char */ | ||
| 7867 | ' ', /* pad char */ | ||
| 7868 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7869 | 1, /* levels_for_compare */ | ||
| 7870 | &my_charset_utf32_handler, | ||
| 7871 | &my_collation_utf32_uca_handler, | ||
| 7872 | PAD_SPACE}; | ||
| 7873 | |||
| 7874 | CHARSET_INFO my_charset_utf32_latvian_uca_ci = { | ||
| 7875 | 162, | ||
| 7876 | 0, | ||
| 7877 | 0, /* number */ | ||
| 7878 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 7879 | "utf32", /* csname */ | ||
| 7880 | "utf32_latvian_ci", /* m_coll_name */ | ||
| 7881 | "", /* comment */ | ||
| 7882 | latvian, /* tailoring */ | ||
| 7883 | nullptr, /* coll_param */ | ||
| 7884 | nullptr, /* ctype */ | ||
| 7885 | nullptr, /* to_lower */ | ||
| 7886 | nullptr, /* to_upper */ | ||
| 7887 | nullptr, /* sort_order */ | ||
| 7888 | nullptr, /* uca */ | ||
| 7889 | nullptr, /* tab_to_uni */ | ||
| 7890 | nullptr, /* tab_from_uni */ | ||
| 7891 | &my_unicase_default, /* caseinfo */ | ||
| 7892 | nullptr, /* state_map */ | ||
| 7893 | nullptr, /* ident_map */ | ||
| 7894 | 8, /* strxfrm_multiply */ | ||
| 7895 | 1, /* caseup_multiply */ | ||
| 7896 | 1, /* casedn_multiply */ | ||
| 7897 | 4, /* mbminlen */ | ||
| 7898 | 4, /* mbmaxlen */ | ||
| 7899 | 1, /* mbmaxlenlen */ | ||
| 7900 | 9, /* min_sort_char */ | ||
| 7901 | 0xFFFF, /* max_sort_char */ | ||
| 7902 | ' ', /* pad char */ | ||
| 7903 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7904 | 1, /* levels_for_compare */ | ||
| 7905 | &my_charset_utf32_handler, | ||
| 7906 | &my_collation_utf32_uca_handler, | ||
| 7907 | PAD_SPACE}; | ||
| 7908 | |||
| 7909 | CHARSET_INFO my_charset_utf32_romanian_uca_ci = { | ||
| 7910 | 163, | ||
| 7911 | 0, | ||
| 7912 | 0, /* number */ | ||
| 7913 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 7914 | "utf32", /* csname */ | ||
| 7915 | "utf32_romanian_ci", /* m_coll_name */ | ||
| 7916 | "", /* comment */ | ||
| 7917 | romanian, /* tailoring */ | ||
| 7918 | nullptr, /* coll_param */ | ||
| 7919 | nullptr, /* ctype */ | ||
| 7920 | nullptr, /* to_lower */ | ||
| 7921 | nullptr, /* to_upper */ | ||
| 7922 | nullptr, /* sort_order */ | ||
| 7923 | nullptr, /* uca */ | ||
| 7924 | nullptr, /* tab_to_uni */ | ||
| 7925 | nullptr, /* tab_from_uni */ | ||
| 7926 | &my_unicase_default, /* caseinfo */ | ||
| 7927 | nullptr, /* state_map */ | ||
| 7928 | nullptr, /* ident_map */ | ||
| 7929 | 8, /* strxfrm_multiply */ | ||
| 7930 | 1, /* caseup_multiply */ | ||
| 7931 | 1, /* casedn_multiply */ | ||
| 7932 | 4, /* mbminlen */ | ||
| 7933 | 4, /* mbmaxlen */ | ||
| 7934 | 1, /* mbmaxlenlen */ | ||
| 7935 | 9, /* min_sort_char */ | ||
| 7936 | 0xFFFF, /* max_sort_char */ | ||
| 7937 | ' ', /* pad char */ | ||
| 7938 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7939 | 1, /* levels_for_compare */ | ||
| 7940 | &my_charset_utf32_handler, | ||
| 7941 | &my_collation_utf32_uca_handler, | ||
| 7942 | PAD_SPACE}; | ||
| 7943 | |||
| 7944 | CHARSET_INFO my_charset_utf32_slovenian_uca_ci = { | ||
| 7945 | 164, | ||
| 7946 | 0, | ||
| 7947 | 0, /* number */ | ||
| 7948 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 7949 | "utf32", /* csname */ | ||
| 7950 | "utf32_slovenian_ci", /* m_coll_name */ | ||
| 7951 | "", /* comment */ | ||
| 7952 | slovenian, /* tailoring */ | ||
| 7953 | nullptr, /* coll_param */ | ||
| 7954 | nullptr, /* ctype */ | ||
| 7955 | nullptr, /* to_lower */ | ||
| 7956 | nullptr, /* to_upper */ | ||
| 7957 | nullptr, /* sort_order */ | ||
| 7958 | nullptr, /* uca */ | ||
| 7959 | nullptr, /* tab_to_uni */ | ||
| 7960 | nullptr, /* tab_from_uni */ | ||
| 7961 | &my_unicase_default, /* caseinfo */ | ||
| 7962 | nullptr, /* state_map */ | ||
| 7963 | nullptr, /* ident_map */ | ||
| 7964 | 8, /* strxfrm_multiply */ | ||
| 7965 | 1, /* caseup_multiply */ | ||
| 7966 | 1, /* casedn_multiply */ | ||
| 7967 | 4, /* mbminlen */ | ||
| 7968 | 4, /* mbmaxlen */ | ||
| 7969 | 1, /* mbmaxlenlen */ | ||
| 7970 | 9, /* min_sort_char */ | ||
| 7971 | 0xFFFF, /* max_sort_char */ | ||
| 7972 | ' ', /* pad char */ | ||
| 7973 | false, /* escape_with_backslash_is_dangerous */ | ||
| 7974 | 1, /* levels_for_compare */ | ||
| 7975 | &my_charset_utf32_handler, | ||
| 7976 | &my_collation_utf32_uca_handler, | ||
| 7977 | PAD_SPACE}; | ||
| 7978 | |||
| 7979 | CHARSET_INFO my_charset_utf32_polish_uca_ci = { | ||
| 7980 | 165, | ||
| 7981 | 0, | ||
| 7982 | 0, /* number */ | ||
| 7983 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 7984 | "utf32", /* csname */ | ||
| 7985 | "utf32_polish_ci", /* m_coll_name */ | ||
| 7986 | "", /* comment */ | ||
| 7987 | polish, /* tailoring */ | ||
| 7988 | nullptr, /* coll_param */ | ||
| 7989 | nullptr, /* ctype */ | ||
| 7990 | nullptr, /* to_lower */ | ||
| 7991 | nullptr, /* to_upper */ | ||
| 7992 | nullptr, /* sort_order */ | ||
| 7993 | nullptr, /* uca */ | ||
| 7994 | nullptr, /* tab_to_uni */ | ||
| 7995 | nullptr, /* tab_from_uni */ | ||
| 7996 | &my_unicase_default, /* caseinfo */ | ||
| 7997 | nullptr, /* state_map */ | ||
| 7998 | nullptr, /* ident_map */ | ||
| 7999 | 8, /* strxfrm_multiply */ | ||
| 8000 | 1, /* caseup_multiply */ | ||
| 8001 | 1, /* casedn_multiply */ | ||
| 8002 | 4, /* mbminlen */ | ||
| 8003 | 4, /* mbmaxlen */ | ||
| 8004 | 1, /* mbmaxlenlen */ | ||
| 8005 | 9, /* min_sort_char */ | ||
| 8006 | 0xFFFF, /* max_sort_char */ | ||
| 8007 | ' ', /* pad char */ | ||
| 8008 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8009 | 1, /* levels_for_compare */ | ||
| 8010 | &my_charset_utf32_handler, | ||
| 8011 | &my_collation_utf32_uca_handler, | ||
| 8012 | PAD_SPACE}; | ||
| 8013 | |||
| 8014 | CHARSET_INFO my_charset_utf32_estonian_uca_ci = { | ||
| 8015 | 166, | ||
| 8016 | 0, | ||
| 8017 | 0, /* number */ | ||
| 8018 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8019 | "utf32", /* csname */ | ||
| 8020 | "utf32_estonian_ci", /* m_coll_name */ | ||
| 8021 | "", /* comment */ | ||
| 8022 | estonian, /* tailoring */ | ||
| 8023 | nullptr, /* coll_param */ | ||
| 8024 | nullptr, /* ctype */ | ||
| 8025 | nullptr, /* to_lower */ | ||
| 8026 | nullptr, /* to_upper */ | ||
| 8027 | nullptr, /* sort_order */ | ||
| 8028 | nullptr, /* uca */ | ||
| 8029 | nullptr, /* tab_to_uni */ | ||
| 8030 | nullptr, /* tab_from_uni */ | ||
| 8031 | &my_unicase_default, /* caseinfo */ | ||
| 8032 | nullptr, /* state_map */ | ||
| 8033 | nullptr, /* ident_map */ | ||
| 8034 | 8, /* strxfrm_multiply */ | ||
| 8035 | 1, /* caseup_multiply */ | ||
| 8036 | 1, /* casedn_multiply */ | ||
| 8037 | 4, /* mbminlen */ | ||
| 8038 | 4, /* mbmaxlen */ | ||
| 8039 | 1, /* mbmaxlenlen */ | ||
| 8040 | 9, /* min_sort_char */ | ||
| 8041 | 0xFFFF, /* max_sort_char */ | ||
| 8042 | ' ', /* pad char */ | ||
| 8043 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8044 | 1, /* levels_for_compare */ | ||
| 8045 | &my_charset_utf32_handler, | ||
| 8046 | &my_collation_utf32_uca_handler, | ||
| 8047 | PAD_SPACE}; | ||
| 8048 | |||
| 8049 | CHARSET_INFO my_charset_utf32_spanish_uca_ci = { | ||
| 8050 | 167, | ||
| 8051 | 0, | ||
| 8052 | 0, /* number */ | ||
| 8053 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8054 | "utf32", /* csname */ | ||
| 8055 | "utf32_spanish_ci", /* m_coll_name */ | ||
| 8056 | "", /* comment */ | ||
| 8057 | spanish, /* tailoring */ | ||
| 8058 | nullptr, /* coll_param */ | ||
| 8059 | nullptr, /* ctype */ | ||
| 8060 | nullptr, /* to_lower */ | ||
| 8061 | nullptr, /* to_upper */ | ||
| 8062 | nullptr, /* sort_order */ | ||
| 8063 | nullptr, /* uca */ | ||
| 8064 | nullptr, /* tab_to_uni */ | ||
| 8065 | nullptr, /* tab_from_uni */ | ||
| 8066 | &my_unicase_default, /* caseinfo */ | ||
| 8067 | nullptr, /* state_map */ | ||
| 8068 | nullptr, /* ident_map */ | ||
| 8069 | 8, /* strxfrm_multiply */ | ||
| 8070 | 1, /* caseup_multiply */ | ||
| 8071 | 1, /* casedn_multiply */ | ||
| 8072 | 4, /* mbminlen */ | ||
| 8073 | 4, /* mbmaxlen */ | ||
| 8074 | 1, /* mbmaxlenlen */ | ||
| 8075 | 9, /* min_sort_char */ | ||
| 8076 | 0xFFFF, /* max_sort_char */ | ||
| 8077 | ' ', /* pad char */ | ||
| 8078 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8079 | 1, /* levels_for_compare */ | ||
| 8080 | &my_charset_utf32_handler, | ||
| 8081 | &my_collation_utf32_uca_handler, | ||
| 8082 | PAD_SPACE}; | ||
| 8083 | |||
| 8084 | CHARSET_INFO my_charset_utf32_swedish_uca_ci = { | ||
| 8085 | 168, | ||
| 8086 | 0, | ||
| 8087 | 0, /* number */ | ||
| 8088 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8089 | "utf32", /* csname */ | ||
| 8090 | "utf32_swedish_ci", /* m_coll_name */ | ||
| 8091 | "", /* comment */ | ||
| 8092 | swedish, /* tailoring */ | ||
| 8093 | nullptr, /* coll_param */ | ||
| 8094 | nullptr, /* ctype */ | ||
| 8095 | nullptr, /* to_lower */ | ||
| 8096 | nullptr, /* to_upper */ | ||
| 8097 | nullptr, /* sort_order */ | ||
| 8098 | nullptr, /* uca */ | ||
| 8099 | nullptr, /* tab_to_uni */ | ||
| 8100 | nullptr, /* tab_from_uni */ | ||
| 8101 | &my_unicase_default, /* caseinfo */ | ||
| 8102 | nullptr, /* state_map */ | ||
| 8103 | nullptr, /* ident_map */ | ||
| 8104 | 8, /* strxfrm_multiply */ | ||
| 8105 | 1, /* caseup_multiply */ | ||
| 8106 | 1, /* casedn_multiply */ | ||
| 8107 | 4, /* mbminlen */ | ||
| 8108 | 4, /* mbmaxlen */ | ||
| 8109 | 1, /* mbmaxlenlen */ | ||
| 8110 | 9, /* min_sort_char */ | ||
| 8111 | 0xFFFF, /* max_sort_char */ | ||
| 8112 | ' ', /* pad char */ | ||
| 8113 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8114 | 1, /* levels_for_compare */ | ||
| 8115 | &my_charset_utf32_handler, | ||
| 8116 | &my_collation_utf32_uca_handler, | ||
| 8117 | PAD_SPACE}; | ||
| 8118 | |||
| 8119 | CHARSET_INFO my_charset_utf32_turkish_uca_ci = { | ||
| 8120 | 169, | ||
| 8121 | 0, | ||
| 8122 | 0, /* number */ | ||
| 8123 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8124 | "utf32", /* csname */ | ||
| 8125 | "utf32_turkish_ci", /* m_coll_name */ | ||
| 8126 | "", /* comment */ | ||
| 8127 | turkish, /* tailoring */ | ||
| 8128 | nullptr, /* coll_param */ | ||
| 8129 | nullptr, /* ctype */ | ||
| 8130 | nullptr, /* to_lower */ | ||
| 8131 | nullptr, /* to_upper */ | ||
| 8132 | nullptr, /* sort_order */ | ||
| 8133 | nullptr, /* uca */ | ||
| 8134 | nullptr, /* tab_to_uni */ | ||
| 8135 | nullptr, /* tab_from_uni */ | ||
| 8136 | &my_unicase_turkish, /* caseinfo */ | ||
| 8137 | nullptr, /* state_map */ | ||
| 8138 | nullptr, /* ident_map */ | ||
| 8139 | 8, /* strxfrm_multiply */ | ||
| 8140 | 1, /* caseup_multiply */ | ||
| 8141 | 1, /* casedn_multiply */ | ||
| 8142 | 4, /* mbminlen */ | ||
| 8143 | 4, /* mbmaxlen */ | ||
| 8144 | 1, /* mbmaxlenlen */ | ||
| 8145 | 9, /* min_sort_char */ | ||
| 8146 | 0xFFFF, /* max_sort_char */ | ||
| 8147 | ' ', /* pad char */ | ||
| 8148 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8149 | 1, /* levels_for_compare */ | ||
| 8150 | &my_charset_utf32_handler, | ||
| 8151 | &my_collation_utf32_uca_handler, | ||
| 8152 | PAD_SPACE}; | ||
| 8153 | |||
| 8154 | CHARSET_INFO my_charset_utf32_czech_uca_ci = { | ||
| 8155 | 170, | ||
| 8156 | 0, | ||
| 8157 | 0, /* number */ | ||
| 8158 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8159 | "utf32", /* csname */ | ||
| 8160 | "utf32_czech_ci", /* m_coll_name */ | ||
| 8161 | "", /* comment */ | ||
| 8162 | czech, /* tailoring */ | ||
| 8163 | nullptr, /* coll_param */ | ||
| 8164 | nullptr, /* ctype */ | ||
| 8165 | nullptr, /* to_lower */ | ||
| 8166 | nullptr, /* to_upper */ | ||
| 8167 | nullptr, /* sort_order */ | ||
| 8168 | nullptr, /* uca */ | ||
| 8169 | nullptr, /* tab_to_uni */ | ||
| 8170 | nullptr, /* tab_from_uni */ | ||
| 8171 | &my_unicase_default, /* caseinfo */ | ||
| 8172 | nullptr, /* state_map */ | ||
| 8173 | nullptr, /* ident_map */ | ||
| 8174 | 8, /* strxfrm_multiply */ | ||
| 8175 | 1, /* caseup_multiply */ | ||
| 8176 | 1, /* casedn_multiply */ | ||
| 8177 | 4, /* mbminlen */ | ||
| 8178 | 4, /* mbmaxlen */ | ||
| 8179 | 1, /* mbmaxlenlen */ | ||
| 8180 | 9, /* min_sort_char */ | ||
| 8181 | 0xFFFF, /* max_sort_char */ | ||
| 8182 | ' ', /* pad char */ | ||
| 8183 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8184 | 1, /* levels_for_compare */ | ||
| 8185 | &my_charset_utf32_handler, | ||
| 8186 | &my_collation_utf32_uca_handler, | ||
| 8187 | PAD_SPACE}; | ||
| 8188 | |||
| 8189 | CHARSET_INFO my_charset_utf32_danish_uca_ci = { | ||
| 8190 | 171, | ||
| 8191 | 0, | ||
| 8192 | 0, /* number */ | ||
| 8193 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8194 | "utf32", /* csname */ | ||
| 8195 | "utf32_danish_ci", /* m_coll_name */ | ||
| 8196 | "", /* comment */ | ||
| 8197 | danish, /* tailoring */ | ||
| 8198 | nullptr, /* coll_param */ | ||
| 8199 | nullptr, /* ctype */ | ||
| 8200 | nullptr, /* to_lower */ | ||
| 8201 | nullptr, /* to_upper */ | ||
| 8202 | nullptr, /* sort_order */ | ||
| 8203 | nullptr, /* uca */ | ||
| 8204 | nullptr, /* tab_to_uni */ | ||
| 8205 | nullptr, /* tab_from_uni */ | ||
| 8206 | &my_unicase_default, /* caseinfo */ | ||
| 8207 | nullptr, /* state_map */ | ||
| 8208 | nullptr, /* ident_map */ | ||
| 8209 | 8, /* strxfrm_multiply */ | ||
| 8210 | 1, /* caseup_multiply */ | ||
| 8211 | 1, /* casedn_multiply */ | ||
| 8212 | 4, /* mbminlen */ | ||
| 8213 | 4, /* mbmaxlen */ | ||
| 8214 | 1, /* mbmaxlenlen */ | ||
| 8215 | 9, /* min_sort_char */ | ||
| 8216 | 0xFFFF, /* max_sort_char */ | ||
| 8217 | ' ', /* pad char */ | ||
| 8218 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8219 | 1, /* levels_for_compare */ | ||
| 8220 | &my_charset_utf32_handler, | ||
| 8221 | &my_collation_utf32_uca_handler, | ||
| 8222 | PAD_SPACE}; | ||
| 8223 | |||
| 8224 | CHARSET_INFO my_charset_utf32_lithuanian_uca_ci = { | ||
| 8225 | 172, | ||
| 8226 | 0, | ||
| 8227 | 0, /* number */ | ||
| 8228 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8229 | "utf32", /* csname */ | ||
| 8230 | "utf32_lithuanian_ci", /* m_coll_name */ | ||
| 8231 | "", /* comment */ | ||
| 8232 | lithuanian, /* tailoring */ | ||
| 8233 | nullptr, /* coll_param */ | ||
| 8234 | nullptr, /* ctype */ | ||
| 8235 | nullptr, /* to_lower */ | ||
| 8236 | nullptr, /* to_upper */ | ||
| 8237 | nullptr, /* sort_order */ | ||
| 8238 | nullptr, /* uca */ | ||
| 8239 | nullptr, /* tab_to_uni */ | ||
| 8240 | nullptr, /* tab_from_uni */ | ||
| 8241 | &my_unicase_default, /* caseinfo */ | ||
| 8242 | nullptr, /* state_map */ | ||
| 8243 | nullptr, /* ident_map */ | ||
| 8244 | 8, /* strxfrm_multiply */ | ||
| 8245 | 1, /* caseup_multiply */ | ||
| 8246 | 1, /* casedn_multiply */ | ||
| 8247 | 4, /* mbminlen */ | ||
| 8248 | 4, /* mbmaxlen */ | ||
| 8249 | 1, /* mbmaxlenlen */ | ||
| 8250 | 9, /* min_sort_char */ | ||
| 8251 | 0xFFFF, /* max_sort_char */ | ||
| 8252 | ' ', /* pad char */ | ||
| 8253 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8254 | 1, /* levels_for_compare */ | ||
| 8255 | &my_charset_utf32_handler, | ||
| 8256 | &my_collation_utf32_uca_handler, | ||
| 8257 | PAD_SPACE}; | ||
| 8258 | |||
| 8259 | CHARSET_INFO my_charset_utf32_slovak_uca_ci = { | ||
| 8260 | 173, | ||
| 8261 | 0, | ||
| 8262 | 0, /* number */ | ||
| 8263 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8264 | "utf32", /* csname */ | ||
| 8265 | "utf32_slovak_ci", /* m_coll_name */ | ||
| 8266 | "", /* comment */ | ||
| 8267 | slovak, /* tailoring */ | ||
| 8268 | nullptr, /* coll_param */ | ||
| 8269 | nullptr, /* ctype */ | ||
| 8270 | nullptr, /* to_lower */ | ||
| 8271 | nullptr, /* to_upper */ | ||
| 8272 | nullptr, /* sort_order */ | ||
| 8273 | nullptr, /* uca */ | ||
| 8274 | nullptr, /* tab_to_uni */ | ||
| 8275 | nullptr, /* tab_from_uni */ | ||
| 8276 | &my_unicase_default, /* caseinfo */ | ||
| 8277 | nullptr, /* state_map */ | ||
| 8278 | nullptr, /* ident_map */ | ||
| 8279 | 8, /* strxfrm_multiply */ | ||
| 8280 | 1, /* caseup_multiply */ | ||
| 8281 | 1, /* casedn_multiply */ | ||
| 8282 | 4, /* mbminlen */ | ||
| 8283 | 4, /* mbmaxlen */ | ||
| 8284 | 1, /* mbmaxlenlen */ | ||
| 8285 | 9, /* min_sort_char */ | ||
| 8286 | 0xFFFF, /* max_sort_char */ | ||
| 8287 | ' ', /* pad char */ | ||
| 8288 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8289 | 1, /* levels_for_compare */ | ||
| 8290 | &my_charset_utf32_handler, | ||
| 8291 | &my_collation_utf32_uca_handler, | ||
| 8292 | PAD_SPACE}; | ||
| 8293 | |||
| 8294 | CHARSET_INFO my_charset_utf32_spanish2_uca_ci = { | ||
| 8295 | 174, | ||
| 8296 | 0, | ||
| 8297 | 0, /* number */ | ||
| 8298 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8299 | "utf32", /* csname */ | ||
| 8300 | "utf32_spanish2_ci", /* m_coll_name */ | ||
| 8301 | "", /* comment */ | ||
| 8302 | spanish2, /* tailoring */ | ||
| 8303 | nullptr, /* coll_param */ | ||
| 8304 | nullptr, /* ctype */ | ||
| 8305 | nullptr, /* to_lower */ | ||
| 8306 | nullptr, /* to_upper */ | ||
| 8307 | nullptr, /* sort_order */ | ||
| 8308 | nullptr, /* uca */ | ||
| 8309 | nullptr, /* tab_to_uni */ | ||
| 8310 | nullptr, /* tab_from_uni */ | ||
| 8311 | &my_unicase_default, /* caseinfo */ | ||
| 8312 | nullptr, /* state_map */ | ||
| 8313 | nullptr, /* ident_map */ | ||
| 8314 | 8, /* strxfrm_multiply */ | ||
| 8315 | 1, /* caseup_multiply */ | ||
| 8316 | 1, /* casedn_multiply */ | ||
| 8317 | 4, /* mbminlen */ | ||
| 8318 | 4, /* mbmaxlen */ | ||
| 8319 | 1, /* mbmaxlenlen */ | ||
| 8320 | 9, /* min_sort_char */ | ||
| 8321 | 0xFFFF, /* max_sort_char */ | ||
| 8322 | ' ', /* pad char */ | ||
| 8323 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8324 | 1, /* levels_for_compare */ | ||
| 8325 | &my_charset_utf32_handler, | ||
| 8326 | &my_collation_utf32_uca_handler, | ||
| 8327 | PAD_SPACE}; | ||
| 8328 | |||
| 8329 | CHARSET_INFO my_charset_utf32_roman_uca_ci = { | ||
| 8330 | 175, | ||
| 8331 | 0, | ||
| 8332 | 0, /* number */ | ||
| 8333 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8334 | "utf32", /* csname */ | ||
| 8335 | "utf32_roman_ci", /* m_coll_name */ | ||
| 8336 | "", /* comment */ | ||
| 8337 | roman, /* tailoring */ | ||
| 8338 | nullptr, /* coll_param */ | ||
| 8339 | nullptr, /* ctype */ | ||
| 8340 | nullptr, /* to_lower */ | ||
| 8341 | nullptr, /* to_upper */ | ||
| 8342 | nullptr, /* sort_order */ | ||
| 8343 | nullptr, /* uca */ | ||
| 8344 | nullptr, /* tab_to_uni */ | ||
| 8345 | nullptr, /* tab_from_uni */ | ||
| 8346 | &my_unicase_default, /* caseinfo */ | ||
| 8347 | nullptr, /* state_map */ | ||
| 8348 | nullptr, /* ident_map */ | ||
| 8349 | 8, /* strxfrm_multiply */ | ||
| 8350 | 1, /* caseup_multiply */ | ||
| 8351 | 1, /* casedn_multiply */ | ||
| 8352 | 4, /* mbminlen */ | ||
| 8353 | 4, /* mbmaxlen */ | ||
| 8354 | 1, /* mbmaxlenlen */ | ||
| 8355 | 9, /* min_sort_char */ | ||
| 8356 | 0xFFFF, /* max_sort_char */ | ||
| 8357 | ' ', /* pad char */ | ||
| 8358 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8359 | 1, /* levels_for_compare */ | ||
| 8360 | &my_charset_utf32_handler, | ||
| 8361 | &my_collation_utf32_uca_handler, | ||
| 8362 | PAD_SPACE}; | ||
| 8363 | |||
| 8364 | CHARSET_INFO my_charset_utf32_persian_uca_ci = { | ||
| 8365 | 176, | ||
| 8366 | 0, | ||
| 8367 | 0, /* number */ | ||
| 8368 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8369 | "utf32", /* csname */ | ||
| 8370 | "utf32_persian_ci", /* m_coll_name */ | ||
| 8371 | "", /* comment */ | ||
| 8372 | persian, /* tailoring */ | ||
| 8373 | nullptr, /* coll_param */ | ||
| 8374 | nullptr, /* ctype */ | ||
| 8375 | nullptr, /* to_lower */ | ||
| 8376 | nullptr, /* to_upper */ | ||
| 8377 | nullptr, /* sort_order */ | ||
| 8378 | nullptr, /* uca */ | ||
| 8379 | nullptr, /* tab_to_uni */ | ||
| 8380 | nullptr, /* tab_from_uni */ | ||
| 8381 | &my_unicase_default, /* caseinfo */ | ||
| 8382 | nullptr, /* state_map */ | ||
| 8383 | nullptr, /* ident_map */ | ||
| 8384 | 8, /* strxfrm_multiply */ | ||
| 8385 | 1, /* caseup_multiply */ | ||
| 8386 | 1, /* casedn_multiply */ | ||
| 8387 | 4, /* mbminlen */ | ||
| 8388 | 4, /* mbmaxlen */ | ||
| 8389 | 1, /* mbmaxlenlen */ | ||
| 8390 | 9, /* min_sort_char */ | ||
| 8391 | 0xFFFF, /* max_sort_char */ | ||
| 8392 | ' ', /* pad char */ | ||
| 8393 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8394 | 1, /* levels_for_compare */ | ||
| 8395 | &my_charset_utf32_handler, | ||
| 8396 | &my_collation_utf32_uca_handler, | ||
| 8397 | PAD_SPACE}; | ||
| 8398 | |||
| 8399 | CHARSET_INFO my_charset_utf32_esperanto_uca_ci = { | ||
| 8400 | 177, | ||
| 8401 | 0, | ||
| 8402 | 0, /* number */ | ||
| 8403 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8404 | "utf32", /* csname */ | ||
| 8405 | "utf32_esperanto_ci", /* m_coll_name */ | ||
| 8406 | "", /* comment */ | ||
| 8407 | esperanto, /* tailoring */ | ||
| 8408 | nullptr, /* coll_param */ | ||
| 8409 | nullptr, /* ctype */ | ||
| 8410 | nullptr, /* to_lower */ | ||
| 8411 | nullptr, /* to_upper */ | ||
| 8412 | nullptr, /* sort_order */ | ||
| 8413 | nullptr, /* uca */ | ||
| 8414 | nullptr, /* tab_to_uni */ | ||
| 8415 | nullptr, /* tab_from_uni */ | ||
| 8416 | &my_unicase_default, /* caseinfo */ | ||
| 8417 | nullptr, /* state_map */ | ||
| 8418 | nullptr, /* ident_map */ | ||
| 8419 | 8, /* strxfrm_multiply */ | ||
| 8420 | 1, /* caseup_multiply */ | ||
| 8421 | 1, /* casedn_multiply */ | ||
| 8422 | 4, /* mbminlen */ | ||
| 8423 | 4, /* mbmaxlen */ | ||
| 8424 | 1, /* mbmaxlenlen */ | ||
| 8425 | 9, /* min_sort_char */ | ||
| 8426 | 0xFFFF, /* max_sort_char */ | ||
| 8427 | ' ', /* pad char */ | ||
| 8428 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8429 | 1, /* levels_for_compare */ | ||
| 8430 | &my_charset_utf32_handler, | ||
| 8431 | &my_collation_utf32_uca_handler, | ||
| 8432 | PAD_SPACE}; | ||
| 8433 | |||
| 8434 | CHARSET_INFO my_charset_utf32_hungarian_uca_ci = { | ||
| 8435 | 178, | ||
| 8436 | 0, | ||
| 8437 | 0, /* number */ | ||
| 8438 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8439 | "utf32", /* csname */ | ||
| 8440 | "utf32_hungarian_ci", /* m_coll_name */ | ||
| 8441 | "", /* comment */ | ||
| 8442 | hungarian, /* tailoring */ | ||
| 8443 | nullptr, /* coll_param */ | ||
| 8444 | nullptr, /* ctype */ | ||
| 8445 | nullptr, /* to_lower */ | ||
| 8446 | nullptr, /* to_upper */ | ||
| 8447 | nullptr, /* sort_order */ | ||
| 8448 | nullptr, /* uca */ | ||
| 8449 | nullptr, /* tab_to_uni */ | ||
| 8450 | nullptr, /* tab_from_uni */ | ||
| 8451 | &my_unicase_default, /* caseinfo */ | ||
| 8452 | nullptr, /* state_map */ | ||
| 8453 | nullptr, /* ident_map */ | ||
| 8454 | 8, /* strxfrm_multiply */ | ||
| 8455 | 1, /* caseup_multiply */ | ||
| 8456 | 1, /* casedn_multiply */ | ||
| 8457 | 4, /* mbminlen */ | ||
| 8458 | 4, /* mbmaxlen */ | ||
| 8459 | 1, /* mbmaxlenlen */ | ||
| 8460 | 9, /* min_sort_char */ | ||
| 8461 | 0xFFFF, /* max_sort_char */ | ||
| 8462 | ' ', /* pad char */ | ||
| 8463 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8464 | 1, /* levels_for_compare */ | ||
| 8465 | &my_charset_utf32_handler, | ||
| 8466 | &my_collation_utf32_uca_handler, | ||
| 8467 | PAD_SPACE}; | ||
| 8468 | |||
| 8469 | CHARSET_INFO my_charset_utf32_sinhala_uca_ci = { | ||
| 8470 | 179, | ||
| 8471 | 0, | ||
| 8472 | 0, /* number */ | ||
| 8473 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8474 | "utf32", /* csname */ | ||
| 8475 | "utf32_sinhala_ci", /* m_coll_name */ | ||
| 8476 | "", /* comment */ | ||
| 8477 | sinhala, /* tailoring */ | ||
| 8478 | nullptr, /* coll_param */ | ||
| 8479 | nullptr, /* ctype */ | ||
| 8480 | nullptr, /* to_lower */ | ||
| 8481 | nullptr, /* to_upper */ | ||
| 8482 | nullptr, /* sort_order */ | ||
| 8483 | nullptr, /* uca */ | ||
| 8484 | nullptr, /* tab_to_uni */ | ||
| 8485 | nullptr, /* tab_from_uni */ | ||
| 8486 | &my_unicase_default, /* caseinfo */ | ||
| 8487 | nullptr, /* state_map */ | ||
| 8488 | nullptr, /* ident_map */ | ||
| 8489 | 8, /* strxfrm_multiply */ | ||
| 8490 | 1, /* caseup_multiply */ | ||
| 8491 | 1, /* casedn_multiply */ | ||
| 8492 | 4, /* mbminlen */ | ||
| 8493 | 4, /* mbmaxlen */ | ||
| 8494 | 1, /* mbmaxlenlen */ | ||
| 8495 | 9, /* min_sort_char */ | ||
| 8496 | 0xFFFF, /* max_sort_char */ | ||
| 8497 | ' ', /* pad char */ | ||
| 8498 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8499 | 1, /* levels_for_compare */ | ||
| 8500 | &my_charset_utf32_handler, | ||
| 8501 | &my_collation_utf32_uca_handler, | ||
| 8502 | PAD_SPACE}; | ||
| 8503 | |||
| 8504 | CHARSET_INFO my_charset_utf32_german2_uca_ci = { | ||
| 8505 | 180, | ||
| 8506 | 0, | ||
| 8507 | 0, /* number */ | ||
| 8508 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8509 | "utf32", /* csname */ | ||
| 8510 | "utf32_german2_ci", /* m_coll_name */ | ||
| 8511 | "", /* comment */ | ||
| 8512 | german2, /* tailoring */ | ||
| 8513 | nullptr, /* coll_param */ | ||
| 8514 | nullptr, /* ctype */ | ||
| 8515 | nullptr, /* to_lower */ | ||
| 8516 | nullptr, /* to_upper */ | ||
| 8517 | nullptr, /* sort_order */ | ||
| 8518 | nullptr, /* uca */ | ||
| 8519 | nullptr, /* tab_to_uni */ | ||
| 8520 | nullptr, /* tab_from_uni */ | ||
| 8521 | &my_unicase_default, /* caseinfo */ | ||
| 8522 | nullptr, /* state_map */ | ||
| 8523 | nullptr, /* ident_map */ | ||
| 8524 | 8, /* strxfrm_multiply */ | ||
| 8525 | 1, /* caseup_multiply */ | ||
| 8526 | 1, /* casedn_multiply */ | ||
| 8527 | 4, /* mbminlen */ | ||
| 8528 | 4, /* mbmaxlen */ | ||
| 8529 | 1, /* mbmaxlenlen */ | ||
| 8530 | 9, /* min_sort_char */ | ||
| 8531 | 0xFFFF, /* max_sort_char */ | ||
| 8532 | ' ', /* pad char */ | ||
| 8533 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8534 | 1, /* levels_for_compare */ | ||
| 8535 | &my_charset_utf32_handler, | ||
| 8536 | &my_collation_utf32_uca_handler, | ||
| 8537 | PAD_SPACE}; | ||
| 8538 | |||
| 8539 | CHARSET_INFO my_charset_utf32_croatian_uca_ci = { | ||
| 8540 | 181, | ||
| 8541 | 0, | ||
| 8542 | 0, /* number */ | ||
| 8543 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8544 | "utf32", /* csname */ | ||
| 8545 | "utf32_croatian_ci", /* m_coll_name */ | ||
| 8546 | "", /* comment */ | ||
| 8547 | croatian, /* tailoring */ | ||
| 8548 | nullptr, /* coll_param */ | ||
| 8549 | nullptr, /* ctype */ | ||
| 8550 | nullptr, /* to_lower */ | ||
| 8551 | nullptr, /* to_upper */ | ||
| 8552 | nullptr, /* sort_order */ | ||
| 8553 | nullptr, /* uca */ | ||
| 8554 | nullptr, /* tab_to_uni */ | ||
| 8555 | nullptr, /* tab_from_uni */ | ||
| 8556 | &my_unicase_default, /* caseinfo */ | ||
| 8557 | nullptr, /* state_map */ | ||
| 8558 | nullptr, /* ident_map */ | ||
| 8559 | 8, /* strxfrm_multiply */ | ||
| 8560 | 1, /* caseup_multiply */ | ||
| 8561 | 1, /* casedn_multiply */ | ||
| 8562 | 4, /* mbminlen */ | ||
| 8563 | 4, /* mbmaxlen */ | ||
| 8564 | 1, /* mbmaxlenlen */ | ||
| 8565 | 9, /* min_sort_char */ | ||
| 8566 | 0xFFFF, /* max_sort_char */ | ||
| 8567 | ' ', /* pad char */ | ||
| 8568 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8569 | 1, /* levels_for_compare */ | ||
| 8570 | &my_charset_utf32_handler, | ||
| 8571 | &my_collation_utf32_uca_handler, | ||
| 8572 | PAD_SPACE}; | ||
| 8573 | |||
| 8574 | CHARSET_INFO my_charset_utf32_unicode_520_ci = { | ||
| 8575 | 182, | ||
| 8576 | 0, | ||
| 8577 | 0, /* number */ | ||
| 8578 | MY_CS_UTF32_UCA_FLAGS, /* stat e */ | ||
| 8579 | "utf32", /* csname */ | ||
| 8580 | "utf32_unicode_520_ci", /* m_coll_name */ | ||
| 8581 | "", /* comment */ | ||
| 8582 | "", /* tailoring */ | ||
| 8583 | nullptr, /* coll_param */ | ||
| 8584 | nullptr, /* ctype */ | ||
| 8585 | nullptr, /* to_lower */ | ||
| 8586 | nullptr, /* to_upper */ | ||
| 8587 | nullptr, /* sort_order */ | ||
| 8588 | &my_uca_v520, /* uca */ | ||
| 8589 | nullptr, /* tab_to_uni */ | ||
| 8590 | nullptr, /* tab_from_uni */ | ||
| 8591 | &my_unicase_unicode520, /* caseinfo */ | ||
| 8592 | nullptr, /* state_map */ | ||
| 8593 | nullptr, /* ident_map */ | ||
| 8594 | 8, /* strxfrm_multiply */ | ||
| 8595 | 1, /* caseup_multiply */ | ||
| 8596 | 1, /* casedn_multiply */ | ||
| 8597 | 4, /* mbminlen */ | ||
| 8598 | 4, /* mbmaxlen */ | ||
| 8599 | 1, /* mbmaxlenlen */ | ||
| 8600 | 9, /* min_sort_char */ | ||
| 8601 | 0x10FFFF, /* max_sort_char */ | ||
| 8602 | ' ', /* pad char */ | ||
| 8603 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8604 | 1, /* levels_for_compare */ | ||
| 8605 | &my_charset_utf32_handler, | ||
| 8606 | &my_collation_utf32_uca_handler, | ||
| 8607 | PAD_SPACE}; | ||
| 8608 | |||
| 8609 | CHARSET_INFO my_charset_utf32_vietnamese_ci = { | ||
| 8610 | 183, | ||
| 8611 | 0, | ||
| 8612 | 0, /* number */ | ||
| 8613 | MY_CS_UTF32_UCA_FLAGS, /* state */ | ||
| 8614 | "utf32", /* csname */ | ||
| 8615 | "utf32_vietnamese_ci", /* m_coll_name */ | ||
| 8616 | "", /* comment */ | ||
| 8617 | vietnamese, /* tailoring */ | ||
| 8618 | nullptr, /* coll_param */ | ||
| 8619 | nullptr, /* ctype */ | ||
| 8620 | nullptr, /* to_lower */ | ||
| 8621 | nullptr, /* to_upper */ | ||
| 8622 | nullptr, /* sort_order */ | ||
| 8623 | nullptr, /* uca */ | ||
| 8624 | nullptr, /* tab_to_uni */ | ||
| 8625 | nullptr, /* tab_from_uni */ | ||
| 8626 | &my_unicase_default, /* caseinfo */ | ||
| 8627 | nullptr, /* state_map */ | ||
| 8628 | nullptr, /* ident_map */ | ||
| 8629 | 8, /* strxfrm_multiply */ | ||
| 8630 | 1, /* caseup_multiply */ | ||
| 8631 | 1, /* casedn_multiply */ | ||
| 8632 | 4, /* mbminlen */ | ||
| 8633 | 4, /* mbmaxlen */ | ||
| 8634 | 1, /* mbmaxlenlen */ | ||
| 8635 | 9, /* min_sort_char */ | ||
| 8636 | 0xFFFF, /* max_sort_char */ | ||
| 8637 | ' ', /* pad char */ | ||
| 8638 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8639 | 1, /* levels_for_compare */ | ||
| 8640 | &my_charset_utf32_handler, | ||
| 8641 | &my_collation_utf32_uca_handler, | ||
| 8642 | PAD_SPACE}; | ||
| 8643 | |||
| 8644 | MY_COLLATION_HANDLER my_collation_utf16_uca_handler = { | ||
| 8645 | my_coll_init_uca, /* init */ | ||
| 8646 | my_coll_uninit_uca, | ||
| 8647 | my_strnncoll_any_uca, | ||
| 8648 | my_strnncollsp_any_uca, | ||
| 8649 | my_strnxfrm_any_uca, | ||
| 8650 | my_strnxfrmlen_simple, | ||
| 8651 | my_like_range_generic, | ||
| 8652 | my_wildcmp_uca, | ||
| 8653 | nullptr, | ||
| 8654 | my_instr_mb, | ||
| 8655 | my_hash_sort_any_uca, | ||
| 8656 | my_propagate_complex}; | ||
| 8657 | |||
| 8658 | extern MY_CHARSET_HANDLER my_charset_utf16_handler; | ||
| 8659 | |||
| 8660 | #define MY_CS_UTF16_UCA_FLAGS \ | ||
| 8661 | (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII) | ||
| 8662 | |||
| 8663 | CHARSET_INFO my_charset_utf16_unicode_ci = { | ||
| 8664 | 101, | ||
| 8665 | 0, | ||
| 8666 | 0, /* number */ | ||
| 8667 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 8668 | "utf16", /* csname */ | ||
| 8669 | "utf16_unicode_ci", /* m_coll_name */ | ||
| 8670 | "", /* comment */ | ||
| 8671 | "", /* tailoring */ | ||
| 8672 | nullptr, /* coll_param */ | ||
| 8673 | nullptr, /* ctype */ | ||
| 8674 | nullptr, /* to_lower */ | ||
| 8675 | nullptr, /* to_upper */ | ||
| 8676 | nullptr, /* sort_order */ | ||
| 8677 | nullptr, /* uca */ | ||
| 8678 | nullptr, /* tab_to_uni */ | ||
| 8679 | nullptr, /* tab_from_uni */ | ||
| 8680 | &my_unicase_default, /* caseinfo */ | ||
| 8681 | nullptr, /* state_map */ | ||
| 8682 | nullptr, /* ident_map */ | ||
| 8683 | 8, /* strxfrm_multiply */ | ||
| 8684 | 1, /* caseup_multiply */ | ||
| 8685 | 1, /* casedn_multiply */ | ||
| 8686 | 2, /* mbminlen */ | ||
| 8687 | 4, /* mbmaxlen */ | ||
| 8688 | 1, /* mbmaxlenlen */ | ||
| 8689 | 9, /* min_sort_char */ | ||
| 8690 | 0xFFFF, /* max_sort_char */ | ||
| 8691 | ' ', /* pad char */ | ||
| 8692 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8693 | 1, /* levels_for_compare */ | ||
| 8694 | &my_charset_utf16_handler, | ||
| 8695 | &my_collation_utf16_uca_handler, | ||
| 8696 | PAD_SPACE}; | ||
| 8697 | |||
| 8698 | CHARSET_INFO my_charset_utf16_icelandic_uca_ci = { | ||
| 8699 | 102, | ||
| 8700 | 0, | ||
| 8701 | 0, /* number */ | ||
| 8702 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 8703 | "utf16", /* csname */ | ||
| 8704 | "utf16_icelandic_ci", /* m_coll_name */ | ||
| 8705 | "", /* comment */ | ||
| 8706 | icelandic, /* tailoring */ | ||
| 8707 | nullptr, /* coll_param */ | ||
| 8708 | nullptr, /* ctype */ | ||
| 8709 | nullptr, /* to_lower */ | ||
| 8710 | nullptr, /* to_upper */ | ||
| 8711 | nullptr, /* sort_order */ | ||
| 8712 | nullptr, /* uca */ | ||
| 8713 | nullptr, /* tab_to_uni */ | ||
| 8714 | nullptr, /* tab_from_uni */ | ||
| 8715 | &my_unicase_default, /* caseinfo */ | ||
| 8716 | nullptr, /* state_map */ | ||
| 8717 | nullptr, /* ident_map */ | ||
| 8718 | 8, /* strxfrm_multiply */ | ||
| 8719 | 1, /* caseup_multiply */ | ||
| 8720 | 1, /* casedn_multiply */ | ||
| 8721 | 2, /* mbminlen */ | ||
| 8722 | 4, /* mbmaxlen */ | ||
| 8723 | 1, /* mbmaxlenlen */ | ||
| 8724 | 9, /* min_sort_char */ | ||
| 8725 | 0xFFFF, /* max_sort_char */ | ||
| 8726 | ' ', /* pad char */ | ||
| 8727 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8728 | 1, /* levels_for_compare */ | ||
| 8729 | &my_charset_utf16_handler, | ||
| 8730 | &my_collation_utf16_uca_handler, | ||
| 8731 | PAD_SPACE}; | ||
| 8732 | |||
| 8733 | CHARSET_INFO my_charset_utf16_latvian_uca_ci = { | ||
| 8734 | 103, | ||
| 8735 | 0, | ||
| 8736 | 0, /* number */ | ||
| 8737 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 8738 | "utf16", /* cs name */ | ||
| 8739 | "utf16_latvian_ci", /* m_coll_name */ | ||
| 8740 | "", /* comment */ | ||
| 8741 | latvian, /* tailoring */ | ||
| 8742 | nullptr, /* coll_param */ | ||
| 8743 | nullptr, /* ctype */ | ||
| 8744 | nullptr, /* to_lower */ | ||
| 8745 | nullptr, /* to_upper */ | ||
| 8746 | nullptr, /* sort_order */ | ||
| 8747 | nullptr, /* uca */ | ||
| 8748 | nullptr, /* tab_to_uni */ | ||
| 8749 | nullptr, /* tab_from_uni */ | ||
| 8750 | &my_unicase_default, /* caseinfo */ | ||
| 8751 | nullptr, /* state_map */ | ||
| 8752 | nullptr, /* ident_map */ | ||
| 8753 | 8, /* strxfrm_multiply */ | ||
| 8754 | 1, /* caseup_multiply */ | ||
| 8755 | 1, /* casedn_multiply */ | ||
| 8756 | 2, /* mbminlen */ | ||
| 8757 | 4, /* mbmaxlen */ | ||
| 8758 | 1, /* mbmaxlenlen */ | ||
| 8759 | 9, /* min_sort_char */ | ||
| 8760 | 0xFFFF, /* max_sort_char */ | ||
| 8761 | ' ', /* pad char */ | ||
| 8762 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8763 | 1, /* levels_for_compare */ | ||
| 8764 | &my_charset_utf16_handler, | ||
| 8765 | &my_collation_utf16_uca_handler, | ||
| 8766 | PAD_SPACE}; | ||
| 8767 | |||
| 8768 | CHARSET_INFO my_charset_utf16_romanian_uca_ci = { | ||
| 8769 | 104, | ||
| 8770 | 0, | ||
| 8771 | 0, /* number */ | ||
| 8772 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 8773 | "utf16", /* cs name */ | ||
| 8774 | "utf16_romanian_ci", /* m_coll_name */ | ||
| 8775 | "", /* comment */ | ||
| 8776 | romanian, /* tailoring */ | ||
| 8777 | nullptr, /* coll_param */ | ||
| 8778 | nullptr, /* ctype */ | ||
| 8779 | nullptr, /* to_lower */ | ||
| 8780 | nullptr, /* to_upper */ | ||
| 8781 | nullptr, /* sort_order */ | ||
| 8782 | nullptr, /* uca */ | ||
| 8783 | nullptr, /* tab_to_uni */ | ||
| 8784 | nullptr, /* tab_from_uni */ | ||
| 8785 | &my_unicase_default, /* caseinfo */ | ||
| 8786 | nullptr, /* state_map */ | ||
| 8787 | nullptr, /* ident_map */ | ||
| 8788 | 8, /* strxfrm_multiply */ | ||
| 8789 | 1, /* caseup_multiply */ | ||
| 8790 | 1, /* casedn_multiply */ | ||
| 8791 | 2, /* mbminlen */ | ||
| 8792 | 4, /* mbmaxlen */ | ||
| 8793 | 1, /* mbmaxlenlen */ | ||
| 8794 | 9, /* min_sort_char */ | ||
| 8795 | 0xFFFF, /* max_sort_char */ | ||
| 8796 | ' ', /* pad char */ | ||
| 8797 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8798 | 1, /* levels_for_compare */ | ||
| 8799 | &my_charset_utf16_handler, | ||
| 8800 | &my_collation_utf16_uca_handler, | ||
| 8801 | PAD_SPACE}; | ||
| 8802 | |||
| 8803 | CHARSET_INFO my_charset_utf16_slovenian_uca_ci = { | ||
| 8804 | 105, | ||
| 8805 | 0, | ||
| 8806 | 0, /* number */ | ||
| 8807 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 8808 | "utf16", /* cs name */ | ||
| 8809 | "utf16_slovenian_ci", /* m_coll_name */ | ||
| 8810 | "", /* comment */ | ||
| 8811 | slovenian, /* tailoring */ | ||
| 8812 | nullptr, /* coll_param */ | ||
| 8813 | nullptr, /* ctype */ | ||
| 8814 | nullptr, /* to_lower */ | ||
| 8815 | nullptr, /* to_upper */ | ||
| 8816 | nullptr, /* sort_order */ | ||
| 8817 | nullptr, /* uca */ | ||
| 8818 | nullptr, /* tab_to_uni */ | ||
| 8819 | nullptr, /* tab_from_uni */ | ||
| 8820 | &my_unicase_default, /* caseinfo */ | ||
| 8821 | nullptr, /* state_map */ | ||
| 8822 | nullptr, /* ident_map */ | ||
| 8823 | 8, /* strxfrm_multiply */ | ||
| 8824 | 1, /* caseup_multiply */ | ||
| 8825 | 1, /* casedn_multiply */ | ||
| 8826 | 2, /* mbminlen */ | ||
| 8827 | 4, /* mbmaxlen */ | ||
| 8828 | 1, /* mbmaxlenlen */ | ||
| 8829 | 9, /* min_sort_char */ | ||
| 8830 | 0xFFFF, /* max_sort_char */ | ||
| 8831 | ' ', /* pad char */ | ||
| 8832 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8833 | 1, /* levels_for_compare */ | ||
| 8834 | &my_charset_utf16_handler, | ||
| 8835 | &my_collation_utf16_uca_handler, | ||
| 8836 | PAD_SPACE}; | ||
| 8837 | |||
| 8838 | CHARSET_INFO my_charset_utf16_polish_uca_ci = { | ||
| 8839 | 106, | ||
| 8840 | 0, | ||
| 8841 | 0, /* number */ | ||
| 8842 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 8843 | "utf16", /* cs name */ | ||
| 8844 | "utf16_polish_ci", /* m_coll_name */ | ||
| 8845 | "", /* comment */ | ||
| 8846 | polish, /* tailoring */ | ||
| 8847 | nullptr, /* coll_param */ | ||
| 8848 | nullptr, /* ctype */ | ||
| 8849 | nullptr, /* to_lower */ | ||
| 8850 | nullptr, /* to_upper */ | ||
| 8851 | nullptr, /* sort_order */ | ||
| 8852 | nullptr, /* uca */ | ||
| 8853 | nullptr, /* tab_to_uni */ | ||
| 8854 | nullptr, /* tab_from_uni */ | ||
| 8855 | &my_unicase_default, /* caseinfo */ | ||
| 8856 | nullptr, /* state_map */ | ||
| 8857 | nullptr, /* ident_map */ | ||
| 8858 | 8, /* strxfrm_multiply */ | ||
| 8859 | 1, /* caseup_multiply */ | ||
| 8860 | 1, /* casedn_multiply */ | ||
| 8861 | 2, /* mbminlen */ | ||
| 8862 | 4, /* mbmaxlen */ | ||
| 8863 | 1, /* mbmaxlenlen */ | ||
| 8864 | 9, /* min_sort_char */ | ||
| 8865 | 0xFFFF, /* max_sort_char */ | ||
| 8866 | ' ', /* pad char */ | ||
| 8867 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8868 | 1, /* levels_for_compare */ | ||
| 8869 | &my_charset_utf16_handler, | ||
| 8870 | &my_collation_utf16_uca_handler, | ||
| 8871 | PAD_SPACE}; | ||
| 8872 | |||
| 8873 | CHARSET_INFO my_charset_utf16_estonian_uca_ci = { | ||
| 8874 | 107, | ||
| 8875 | 0, | ||
| 8876 | 0, /* number */ | ||
| 8877 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 8878 | "utf16", /* cs name */ | ||
| 8879 | "utf16_estonian_ci", /* m_coll_name */ | ||
| 8880 | "", /* comment */ | ||
| 8881 | estonian, /* tailoring */ | ||
| 8882 | nullptr, /* coll_param */ | ||
| 8883 | nullptr, /* ctype */ | ||
| 8884 | nullptr, /* to_lower */ | ||
| 8885 | nullptr, /* to_upper */ | ||
| 8886 | nullptr, /* sort_order */ | ||
| 8887 | nullptr, /* uca */ | ||
| 8888 | nullptr, /* tab_to_uni */ | ||
| 8889 | nullptr, /* tab_from_uni */ | ||
| 8890 | &my_unicase_default, /* caseinfo */ | ||
| 8891 | nullptr, /* state_map */ | ||
| 8892 | nullptr, /* ident_map */ | ||
| 8893 | 8, /* strxfrm_multiply */ | ||
| 8894 | 1, /* caseup_multiply */ | ||
| 8895 | 1, /* casedn_multiply */ | ||
| 8896 | 2, /* mbminlen */ | ||
| 8897 | 4, /* mbmaxlen */ | ||
| 8898 | 1, /* mbmaxlenlen */ | ||
| 8899 | 9, /* min_sort_char */ | ||
| 8900 | 0xFFFF, /* max_sort_char */ | ||
| 8901 | ' ', /* pad char */ | ||
| 8902 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8903 | 1, /* levels_for_compare */ | ||
| 8904 | &my_charset_utf16_handler, | ||
| 8905 | &my_collation_utf16_uca_handler, | ||
| 8906 | PAD_SPACE}; | ||
| 8907 | |||
| 8908 | CHARSET_INFO my_charset_utf16_spanish_uca_ci = { | ||
| 8909 | 108, | ||
| 8910 | 0, | ||
| 8911 | 0, /* number */ | ||
| 8912 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 8913 | "utf16", /* cs name */ | ||
| 8914 | "utf16_spanish_ci", /* m_coll_name */ | ||
| 8915 | "", /* comment */ | ||
| 8916 | spanish, /* tailoring */ | ||
| 8917 | nullptr, /* coll_param */ | ||
| 8918 | nullptr, /* ctype */ | ||
| 8919 | nullptr, /* to_lower */ | ||
| 8920 | nullptr, /* to_upper */ | ||
| 8921 | nullptr, /* sort_order */ | ||
| 8922 | nullptr, /* uca */ | ||
| 8923 | nullptr, /* tab_to_uni */ | ||
| 8924 | nullptr, /* tab_from_uni */ | ||
| 8925 | &my_unicase_default, /* caseinfo */ | ||
| 8926 | nullptr, /* state_map */ | ||
| 8927 | nullptr, /* ident_map */ | ||
| 8928 | 8, /* strxfrm_multiply */ | ||
| 8929 | 1, /* caseup_multiply */ | ||
| 8930 | 1, /* casedn_multiply */ | ||
| 8931 | 2, /* mbminlen */ | ||
| 8932 | 4, /* mbmaxlen */ | ||
| 8933 | 1, /* mbmaxlenlen */ | ||
| 8934 | 9, /* min_sort_char */ | ||
| 8935 | 0xFFFF, /* max_sort_char */ | ||
| 8936 | ' ', /* pad char */ | ||
| 8937 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8938 | 1, /* levels_for_compare */ | ||
| 8939 | &my_charset_utf16_handler, | ||
| 8940 | &my_collation_utf16_uca_handler, | ||
| 8941 | PAD_SPACE}; | ||
| 8942 | |||
| 8943 | CHARSET_INFO my_charset_utf16_swedish_uca_ci = { | ||
| 8944 | 109, | ||
| 8945 | 0, | ||
| 8946 | 0, /* number */ | ||
| 8947 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 8948 | "utf16", /* cs name */ | ||
| 8949 | "utf16_swedish_ci", /* m_coll_name */ | ||
| 8950 | "", /* comment */ | ||
| 8951 | swedish, /* tailoring */ | ||
| 8952 | nullptr, /* coll_param */ | ||
| 8953 | nullptr, /* ctype */ | ||
| 8954 | nullptr, /* to_lower */ | ||
| 8955 | nullptr, /* to_upper */ | ||
| 8956 | nullptr, /* sort_order */ | ||
| 8957 | nullptr, /* uca */ | ||
| 8958 | nullptr, /* tab_to_uni */ | ||
| 8959 | nullptr, /* tab_from_uni */ | ||
| 8960 | &my_unicase_default, /* caseinfo */ | ||
| 8961 | nullptr, /* state_map */ | ||
| 8962 | nullptr, /* ident_map */ | ||
| 8963 | 8, /* strxfrm_multiply */ | ||
| 8964 | 1, /* caseup_multiply */ | ||
| 8965 | 1, /* casedn_multiply */ | ||
| 8966 | 2, /* mbminlen */ | ||
| 8967 | 4, /* mbmaxlen */ | ||
| 8968 | 1, /* mbmaxlenlen */ | ||
| 8969 | 9, /* min_sort_char */ | ||
| 8970 | 0xFFFF, /* max_sort_char */ | ||
| 8971 | ' ', /* pad char */ | ||
| 8972 | false, /* escape_with_backslash_is_dangerous */ | ||
| 8973 | 1, /* levels_for_compare */ | ||
| 8974 | &my_charset_utf16_handler, | ||
| 8975 | &my_collation_utf16_uca_handler, | ||
| 8976 | PAD_SPACE}; | ||
| 8977 | |||
| 8978 | CHARSET_INFO my_charset_utf16_turkish_uca_ci = { | ||
| 8979 | 110, | ||
| 8980 | 0, | ||
| 8981 | 0, /* number */ | ||
| 8982 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 8983 | "utf16", /* cs name */ | ||
| 8984 | "utf16_turkish_ci", /* m_coll_name */ | ||
| 8985 | "", /* comment */ | ||
| 8986 | turkish, /* tailoring */ | ||
| 8987 | nullptr, /* coll_param */ | ||
| 8988 | nullptr, /* ctype */ | ||
| 8989 | nullptr, /* to_lower */ | ||
| 8990 | nullptr, /* to_upper */ | ||
| 8991 | nullptr, /* sort_order */ | ||
| 8992 | nullptr, /* uca */ | ||
| 8993 | nullptr, /* tab_to_uni */ | ||
| 8994 | nullptr, /* tab_from_uni */ | ||
| 8995 | &my_unicase_turkish, /* caseinfo */ | ||
| 8996 | nullptr, /* state_map */ | ||
| 8997 | nullptr, /* ident_map */ | ||
| 8998 | 8, /* strxfrm_multiply */ | ||
| 8999 | 1, /* caseup_multiply */ | ||
| 9000 | 1, /* casedn_multiply */ | ||
| 9001 | 2, /* mbminlen */ | ||
| 9002 | 4, /* mbmaxlen */ | ||
| 9003 | 1, /* mbmaxlenlen */ | ||
| 9004 | 9, /* min_sort_char */ | ||
| 9005 | 0xFFFF, /* max_sort_char */ | ||
| 9006 | ' ', /* pad char */ | ||
| 9007 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9008 | 1, /* levels_for_compare */ | ||
| 9009 | &my_charset_utf16_handler, | ||
| 9010 | &my_collation_utf16_uca_handler, | ||
| 9011 | PAD_SPACE}; | ||
| 9012 | |||
| 9013 | CHARSET_INFO my_charset_utf16_czech_uca_ci = { | ||
| 9014 | 111, | ||
| 9015 | 0, | ||
| 9016 | 0, /* number */ | ||
| 9017 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9018 | "utf16", /* cs name */ | ||
| 9019 | "utf16_czech_ci", /* m_coll_name */ | ||
| 9020 | "", /* comment */ | ||
| 9021 | czech, /* tailoring */ | ||
| 9022 | nullptr, /* coll_param */ | ||
| 9023 | nullptr, /* ctype */ | ||
| 9024 | nullptr, /* to_lower */ | ||
| 9025 | nullptr, /* to_upper */ | ||
| 9026 | nullptr, /* sort_order */ | ||
| 9027 | nullptr, /* uca */ | ||
| 9028 | nullptr, /* tab_to_uni */ | ||
| 9029 | nullptr, /* tab_from_uni */ | ||
| 9030 | &my_unicase_default, /* caseinfo */ | ||
| 9031 | nullptr, /* state_map */ | ||
| 9032 | nullptr, /* ident_map */ | ||
| 9033 | 8, /* strxfrm_multiply */ | ||
| 9034 | 1, /* caseup_multiply */ | ||
| 9035 | 1, /* casedn_multiply */ | ||
| 9036 | 2, /* mbminlen */ | ||
| 9037 | 4, /* mbmaxlen */ | ||
| 9038 | 1, /* mbmaxlenlen */ | ||
| 9039 | 9, /* min_sort_char */ | ||
| 9040 | 0xFFFF, /* max_sort_char */ | ||
| 9041 | ' ', /* pad char */ | ||
| 9042 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9043 | 1, /* levels_for_compare */ | ||
| 9044 | &my_charset_utf16_handler, | ||
| 9045 | &my_collation_utf16_uca_handler, | ||
| 9046 | PAD_SPACE}; | ||
| 9047 | |||
| 9048 | CHARSET_INFO my_charset_utf16_danish_uca_ci = { | ||
| 9049 | 112, | ||
| 9050 | 0, | ||
| 9051 | 0, /* number */ | ||
| 9052 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9053 | "utf16", /* cs name */ | ||
| 9054 | "utf16_danish_ci", /* m_coll_name */ | ||
| 9055 | "", /* comment */ | ||
| 9056 | danish, /* tailoring */ | ||
| 9057 | nullptr, /* coll_param */ | ||
| 9058 | nullptr, /* ctype */ | ||
| 9059 | nullptr, /* to_lower */ | ||
| 9060 | nullptr, /* to_upper */ | ||
| 9061 | nullptr, /* sort_order */ | ||
| 9062 | nullptr, /* uca */ | ||
| 9063 | nullptr, /* tab_to_uni */ | ||
| 9064 | nullptr, /* tab_from_uni */ | ||
| 9065 | &my_unicase_default, /* caseinfo */ | ||
| 9066 | nullptr, /* state_map */ | ||
| 9067 | nullptr, /* ident_map */ | ||
| 9068 | 8, /* strxfrm_multiply */ | ||
| 9069 | 1, /* caseup_multiply */ | ||
| 9070 | 1, /* casedn_multiply */ | ||
| 9071 | 2, /* mbminlen */ | ||
| 9072 | 4, /* mbmaxlen */ | ||
| 9073 | 1, /* mbmaxlenlen */ | ||
| 9074 | 9, /* min_sort_char */ | ||
| 9075 | 0xFFFF, /* max_sort_char */ | ||
| 9076 | ' ', /* pad char */ | ||
| 9077 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9078 | 1, /* levels_for_compare */ | ||
| 9079 | &my_charset_utf16_handler, | ||
| 9080 | &my_collation_utf16_uca_handler, | ||
| 9081 | PAD_SPACE}; | ||
| 9082 | |||
| 9083 | CHARSET_INFO my_charset_utf16_lithuanian_uca_ci = { | ||
| 9084 | 113, | ||
| 9085 | 0, | ||
| 9086 | 0, /* number */ | ||
| 9087 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9088 | "utf16", /* cs name */ | ||
| 9089 | "utf16_lithuanian_ci", /* m_coll_name */ | ||
| 9090 | "", /* comment */ | ||
| 9091 | lithuanian, /* tailoring */ | ||
| 9092 | nullptr, /* coll_param */ | ||
| 9093 | nullptr, /* ctype */ | ||
| 9094 | nullptr, /* to_lower */ | ||
| 9095 | nullptr, /* to_upper */ | ||
| 9096 | nullptr, /* sort_order */ | ||
| 9097 | nullptr, /* uca */ | ||
| 9098 | nullptr, /* tab_to_uni */ | ||
| 9099 | nullptr, /* tab_from_uni */ | ||
| 9100 | &my_unicase_default, /* caseinfo */ | ||
| 9101 | nullptr, /* state_map */ | ||
| 9102 | nullptr, /* ident_map */ | ||
| 9103 | 8, /* strxfrm_multiply */ | ||
| 9104 | 1, /* caseup_multiply */ | ||
| 9105 | 1, /* casedn_multiply */ | ||
| 9106 | 2, /* mbminlen */ | ||
| 9107 | 4, /* mbmaxlen */ | ||
| 9108 | 1, /* mbmaxlenlen */ | ||
| 9109 | 9, /* min_sort_char */ | ||
| 9110 | 0xFFFF, /* max_sort_char */ | ||
| 9111 | ' ', /* pad char */ | ||
| 9112 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9113 | 1, /* levels_for_compare */ | ||
| 9114 | &my_charset_utf16_handler, | ||
| 9115 | &my_collation_utf16_uca_handler, | ||
| 9116 | PAD_SPACE}; | ||
| 9117 | |||
| 9118 | CHARSET_INFO my_charset_utf16_slovak_uca_ci = { | ||
| 9119 | 114, | ||
| 9120 | 0, | ||
| 9121 | 0, /* number */ | ||
| 9122 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9123 | "utf16", /* cs name */ | ||
| 9124 | "utf16_slovak_ci", /* m_coll_name */ | ||
| 9125 | "", /* comment */ | ||
| 9126 | slovak, /* tailoring */ | ||
| 9127 | nullptr, /* coll_param */ | ||
| 9128 | nullptr, /* ctype */ | ||
| 9129 | nullptr, /* to_lower */ | ||
| 9130 | nullptr, /* to_upper */ | ||
| 9131 | nullptr, /* sort_order */ | ||
| 9132 | nullptr, /* uca */ | ||
| 9133 | nullptr, /* tab_to_uni */ | ||
| 9134 | nullptr, /* tab_from_uni */ | ||
| 9135 | &my_unicase_default, /* caseinfo */ | ||
| 9136 | nullptr, /* state_map */ | ||
| 9137 | nullptr, /* ident_map */ | ||
| 9138 | 8, /* strxfrm_multiply */ | ||
| 9139 | 1, /* caseup_multiply */ | ||
| 9140 | 1, /* casedn_multiply */ | ||
| 9141 | 2, /* mbminlen */ | ||
| 9142 | 4, /* mbmaxlen */ | ||
| 9143 | 1, /* mbmaxlenlen */ | ||
| 9144 | 9, /* min_sort_char */ | ||
| 9145 | 0xFFFF, /* max_sort_char */ | ||
| 9146 | ' ', /* pad char */ | ||
| 9147 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9148 | 1, /* levels_for_compare */ | ||
| 9149 | &my_charset_utf16_handler, | ||
| 9150 | &my_collation_utf16_uca_handler, | ||
| 9151 | PAD_SPACE}; | ||
| 9152 | |||
| 9153 | CHARSET_INFO my_charset_utf16_spanish2_uca_ci = { | ||
| 9154 | 115, | ||
| 9155 | 0, | ||
| 9156 | 0, /* number */ | ||
| 9157 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9158 | "utf16", /* cs name */ | ||
| 9159 | "utf16_spanish2_ci", /* m_coll_name */ | ||
| 9160 | "", /* comment */ | ||
| 9161 | spanish2, /* tailoring */ | ||
| 9162 | nullptr, /* coll_param */ | ||
| 9163 | nullptr, /* ctype */ | ||
| 9164 | nullptr, /* to_lower */ | ||
| 9165 | nullptr, /* to_upper */ | ||
| 9166 | nullptr, /* sort_order */ | ||
| 9167 | nullptr, /* uca */ | ||
| 9168 | nullptr, /* tab_to_uni */ | ||
| 9169 | nullptr, /* tab_from_uni */ | ||
| 9170 | &my_unicase_default, /* caseinfo */ | ||
| 9171 | nullptr, /* state_map */ | ||
| 9172 | nullptr, /* ident_map */ | ||
| 9173 | 8, /* strxfrm_multiply */ | ||
| 9174 | 1, /* caseup_multiply */ | ||
| 9175 | 1, /* casedn_multiply */ | ||
| 9176 | 2, /* mbminlen */ | ||
| 9177 | 4, /* mbmaxlen */ | ||
| 9178 | 1, /* mbmaxlenlen */ | ||
| 9179 | 9, /* min_sort_char */ | ||
| 9180 | 0xFFFF, /* max_sort_char */ | ||
| 9181 | ' ', /* pad char */ | ||
| 9182 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9183 | 1, /* levels_for_compare */ | ||
| 9184 | &my_charset_utf16_handler, | ||
| 9185 | &my_collation_utf16_uca_handler, | ||
| 9186 | PAD_SPACE}; | ||
| 9187 | |||
| 9188 | CHARSET_INFO my_charset_utf16_roman_uca_ci = { | ||
| 9189 | 116, | ||
| 9190 | 0, | ||
| 9191 | 0, /* number */ | ||
| 9192 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9193 | "utf16", /* cs name */ | ||
| 9194 | "utf16_roman_ci", /* m_coll_name */ | ||
| 9195 | "", /* comment */ | ||
| 9196 | roman, /* tailoring */ | ||
| 9197 | nullptr, /* coll_param */ | ||
| 9198 | nullptr, /* ctype */ | ||
| 9199 | nullptr, /* to_lower */ | ||
| 9200 | nullptr, /* to_upper */ | ||
| 9201 | nullptr, /* sort_order */ | ||
| 9202 | nullptr, /* uca */ | ||
| 9203 | nullptr, /* tab_to_uni */ | ||
| 9204 | nullptr, /* tab_from_uni */ | ||
| 9205 | &my_unicase_default, /* caseinfo */ | ||
| 9206 | nullptr, /* state_map */ | ||
| 9207 | nullptr, /* ident_map */ | ||
| 9208 | 8, /* strxfrm_multiply */ | ||
| 9209 | 1, /* caseup_multiply */ | ||
| 9210 | 1, /* casedn_multiply */ | ||
| 9211 | 2, /* mbminlen */ | ||
| 9212 | 4, /* mbmaxlen */ | ||
| 9213 | 1, /* mbmaxlenlen */ | ||
| 9214 | 9, /* min_sort_char */ | ||
| 9215 | 0xFFFF, /* max_sort_char */ | ||
| 9216 | ' ', /* pad char */ | ||
| 9217 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9218 | 1, /* levels_for_compare */ | ||
| 9219 | &my_charset_utf16_handler, | ||
| 9220 | &my_collation_utf16_uca_handler, | ||
| 9221 | PAD_SPACE}; | ||
| 9222 | |||
| 9223 | CHARSET_INFO my_charset_utf16_persian_uca_ci = { | ||
| 9224 | 117, | ||
| 9225 | 0, | ||
| 9226 | 0, /* number */ | ||
| 9227 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9228 | "utf16", /* cs name */ | ||
| 9229 | "utf16_persian_ci", /* m_coll_name */ | ||
| 9230 | "", /* comment */ | ||
| 9231 | persian, /* tailoring */ | ||
| 9232 | nullptr, /* coll_param */ | ||
| 9233 | nullptr, /* ctype */ | ||
| 9234 | nullptr, /* to_lower */ | ||
| 9235 | nullptr, /* to_upper */ | ||
| 9236 | nullptr, /* sort_order */ | ||
| 9237 | nullptr, /* uca */ | ||
| 9238 | nullptr, /* tab_to_uni */ | ||
| 9239 | nullptr, /* tab_from_uni */ | ||
| 9240 | &my_unicase_default, /* caseinfo */ | ||
| 9241 | nullptr, /* state_map */ | ||
| 9242 | nullptr, /* ident_map */ | ||
| 9243 | 8, /* strxfrm_multiply */ | ||
| 9244 | 1, /* caseup_multiply */ | ||
| 9245 | 1, /* casedn_multiply */ | ||
| 9246 | 2, /* mbminlen */ | ||
| 9247 | 4, /* mbmaxlen */ | ||
| 9248 | 1, /* mbmaxlenlen */ | ||
| 9249 | 9, /* min_sort_char */ | ||
| 9250 | 0xFFFF, /* max_sort_char */ | ||
| 9251 | ' ', /* pad char */ | ||
| 9252 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9253 | 1, /* levels_for_compare */ | ||
| 9254 | &my_charset_utf16_handler, | ||
| 9255 | &my_collation_utf16_uca_handler, | ||
| 9256 | PAD_SPACE}; | ||
| 9257 | |||
| 9258 | CHARSET_INFO my_charset_utf16_esperanto_uca_ci = { | ||
| 9259 | 118, | ||
| 9260 | 0, | ||
| 9261 | 0, /* number */ | ||
| 9262 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9263 | "utf16", /* cs name */ | ||
| 9264 | "utf16_esperanto_ci", /* m_coll_name */ | ||
| 9265 | "", /* comment */ | ||
| 9266 | esperanto, /* tailoring */ | ||
| 9267 | nullptr, /* coll_param */ | ||
| 9268 | nullptr, /* ctype */ | ||
| 9269 | nullptr, /* to_lower */ | ||
| 9270 | nullptr, /* to_upper */ | ||
| 9271 | nullptr, /* sort_order */ | ||
| 9272 | nullptr, /* uca */ | ||
| 9273 | nullptr, /* tab_to_uni */ | ||
| 9274 | nullptr, /* tab_from_uni */ | ||
| 9275 | &my_unicase_default, /* caseinfo */ | ||
| 9276 | nullptr, /* state_map */ | ||
| 9277 | nullptr, /* ident_map */ | ||
| 9278 | 8, /* strxfrm_multiply */ | ||
| 9279 | 1, /* caseup_multiply */ | ||
| 9280 | 1, /* casedn_multiply */ | ||
| 9281 | 2, /* mbminlen */ | ||
| 9282 | 4, /* mbmaxlen */ | ||
| 9283 | 1, /* mbmaxlenlen */ | ||
| 9284 | 9, /* min_sort_char */ | ||
| 9285 | 0xFFFF, /* max_sort_char */ | ||
| 9286 | ' ', /* pad char */ | ||
| 9287 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9288 | 1, /* levels_for_compare */ | ||
| 9289 | &my_charset_utf16_handler, | ||
| 9290 | &my_collation_utf16_uca_handler, | ||
| 9291 | PAD_SPACE}; | ||
| 9292 | |||
| 9293 | CHARSET_INFO my_charset_utf16_hungarian_uca_ci = { | ||
| 9294 | 119, | ||
| 9295 | 0, | ||
| 9296 | 0, /* number */ | ||
| 9297 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9298 | "utf16", /* cs name */ | ||
| 9299 | "utf16_hungarian_ci", /* m_coll_name */ | ||
| 9300 | "", /* comment */ | ||
| 9301 | hungarian, /* tailoring */ | ||
| 9302 | nullptr, /* coll_param */ | ||
| 9303 | nullptr, /* ctype */ | ||
| 9304 | nullptr, /* to_lower */ | ||
| 9305 | nullptr, /* to_upper */ | ||
| 9306 | nullptr, /* sort_order */ | ||
| 9307 | nullptr, /* uca */ | ||
| 9308 | nullptr, /* tab_to_uni */ | ||
| 9309 | nullptr, /* tab_from_uni */ | ||
| 9310 | &my_unicase_default, /* caseinfo */ | ||
| 9311 | nullptr, /* state_map */ | ||
| 9312 | nullptr, /* ident_map */ | ||
| 9313 | 8, /* strxfrm_multiply */ | ||
| 9314 | 1, /* caseup_multiply */ | ||
| 9315 | 1, /* casedn_multiply */ | ||
| 9316 | 2, /* mbminlen */ | ||
| 9317 | 4, /* mbmaxlen */ | ||
| 9318 | 1, /* mbmaxlenlen */ | ||
| 9319 | 9, /* min_sort_char */ | ||
| 9320 | 0xFFFF, /* max_sort_char */ | ||
| 9321 | ' ', /* pad char */ | ||
| 9322 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9323 | 1, /* levels_for_compare */ | ||
| 9324 | &my_charset_utf16_handler, | ||
| 9325 | &my_collation_utf16_uca_handler, | ||
| 9326 | PAD_SPACE}; | ||
| 9327 | |||
| 9328 | CHARSET_INFO my_charset_utf16_sinhala_uca_ci = { | ||
| 9329 | 120, | ||
| 9330 | 0, | ||
| 9331 | 0, /* number */ | ||
| 9332 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9333 | "utf16", /* cs name */ | ||
| 9334 | "utf16_sinhala_ci", /* m_coll_name */ | ||
| 9335 | "", /* comment */ | ||
| 9336 | sinhala, /* tailoring */ | ||
| 9337 | nullptr, /* coll_param */ | ||
| 9338 | nullptr, /* ctype */ | ||
| 9339 | nullptr, /* to_lower */ | ||
| 9340 | nullptr, /* to_upper */ | ||
| 9341 | nullptr, /* sort_order */ | ||
| 9342 | nullptr, /* uca */ | ||
| 9343 | nullptr, /* tab_to_uni */ | ||
| 9344 | nullptr, /* tab_from_uni */ | ||
| 9345 | &my_unicase_default, /* caseinfo */ | ||
| 9346 | nullptr, /* state_map */ | ||
| 9347 | nullptr, /* ident_map */ | ||
| 9348 | 8, /* strxfrm_multiply */ | ||
| 9349 | 1, /* caseup_multiply */ | ||
| 9350 | 1, /* casedn_multiply */ | ||
| 9351 | 2, /* mbminlen */ | ||
| 9352 | 4, /* mbmaxlen */ | ||
| 9353 | 1, /* mbmaxlenlen */ | ||
| 9354 | 9, /* min_sort_char */ | ||
| 9355 | 0xFFFF, /* max_sort_char */ | ||
| 9356 | ' ', /* pad char */ | ||
| 9357 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9358 | 1, /* levels_for_compare */ | ||
| 9359 | &my_charset_utf16_handler, | ||
| 9360 | &my_collation_utf16_uca_handler, | ||
| 9361 | PAD_SPACE}; | ||
| 9362 | |||
| 9363 | CHARSET_INFO my_charset_utf16_german2_uca_ci = { | ||
| 9364 | 121, | ||
| 9365 | 0, | ||
| 9366 | 0, /* number */ | ||
| 9367 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9368 | "utf16", /* cs name */ | ||
| 9369 | "utf16_german2_ci", /* m_coll_name */ | ||
| 9370 | "", /* comment */ | ||
| 9371 | german2, /* tailoring */ | ||
| 9372 | nullptr, /* coll_param */ | ||
| 9373 | nullptr, /* ctype */ | ||
| 9374 | nullptr, /* to_lower */ | ||
| 9375 | nullptr, /* to_upper */ | ||
| 9376 | nullptr, /* sort_order */ | ||
| 9377 | nullptr, /* uca */ | ||
| 9378 | nullptr, /* tab_to_uni */ | ||
| 9379 | nullptr, /* tab_from_uni */ | ||
| 9380 | &my_unicase_default, /* caseinfo */ | ||
| 9381 | nullptr, /* state_map */ | ||
| 9382 | nullptr, /* ident_map */ | ||
| 9383 | 8, /* strxfrm_multiply */ | ||
| 9384 | 1, /* caseup_multiply */ | ||
| 9385 | 1, /* casedn_multiply */ | ||
| 9386 | 2, /* mbminlen */ | ||
| 9387 | 4, /* mbmaxlen */ | ||
| 9388 | 1, /* mbmaxlenlen */ | ||
| 9389 | 9, /* min_sort_char */ | ||
| 9390 | 0xFFFF, /* max_sort_char */ | ||
| 9391 | ' ', /* pad char */ | ||
| 9392 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9393 | 1, /* levels_for_compare */ | ||
| 9394 | &my_charset_utf16_handler, | ||
| 9395 | &my_collation_utf16_uca_handler, | ||
| 9396 | PAD_SPACE}; | ||
| 9397 | |||
| 9398 | CHARSET_INFO my_charset_utf16_croatian_uca_ci = { | ||
| 9399 | 122, | ||
| 9400 | 0, | ||
| 9401 | 0, /* number */ | ||
| 9402 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9403 | "utf16", /* cs name */ | ||
| 9404 | "utf16_croatian_ci", /* m_coll_name */ | ||
| 9405 | "", /* comment */ | ||
| 9406 | croatian, /* tailoring */ | ||
| 9407 | nullptr, /* coll_param */ | ||
| 9408 | nullptr, /* ctype */ | ||
| 9409 | nullptr, /* to_lower */ | ||
| 9410 | nullptr, /* to_upper */ | ||
| 9411 | nullptr, /* sort_order */ | ||
| 9412 | nullptr, /* uca */ | ||
| 9413 | nullptr, /* tab_to_uni */ | ||
| 9414 | nullptr, /* tab_from_uni */ | ||
| 9415 | &my_unicase_default, /* caseinfo */ | ||
| 9416 | nullptr, /* state_map */ | ||
| 9417 | nullptr, /* ident_map */ | ||
| 9418 | 8, /* strxfrm_multiply */ | ||
| 9419 | 1, /* caseup_multiply */ | ||
| 9420 | 1, /* casedn_multiply */ | ||
| 9421 | 2, /* mbminlen */ | ||
| 9422 | 4, /* mbmaxlen */ | ||
| 9423 | 1, /* mbmaxlenlen */ | ||
| 9424 | 9, /* min_sort_char */ | ||
| 9425 | 0xFFFF, /* max_sort_char */ | ||
| 9426 | ' ', /* pad char */ | ||
| 9427 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9428 | 1, /* levels_for_compare */ | ||
| 9429 | &my_charset_utf16_handler, | ||
| 9430 | &my_collation_utf16_uca_handler, | ||
| 9431 | PAD_SPACE}; | ||
| 9432 | |||
| 9433 | CHARSET_INFO my_charset_utf16_unicode_520_ci = { | ||
| 9434 | 123, | ||
| 9435 | 0, | ||
| 9436 | 0, /* number */ | ||
| 9437 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII, | ||
| 9438 | "utf16", /* csname */ | ||
| 9439 | "utf16_unicode_520_ci", /* m_coll_name */ | ||
| 9440 | "", /* comment */ | ||
| 9441 | "", /* tailoring */ | ||
| 9442 | nullptr, /* coll_param */ | ||
| 9443 | nullptr, /* ctype */ | ||
| 9444 | nullptr, /* to_lower */ | ||
| 9445 | nullptr, /* to_upper */ | ||
| 9446 | nullptr, /* sort_order */ | ||
| 9447 | &my_uca_v520, /* uca */ | ||
| 9448 | nullptr, /* tab_to_uni */ | ||
| 9449 | nullptr, /* tab_from_uni */ | ||
| 9450 | &my_unicase_unicode520, /* caseinfo */ | ||
| 9451 | nullptr, /* state_map */ | ||
| 9452 | nullptr, /* ident_map */ | ||
| 9453 | 8, /* strxfrm_multiply */ | ||
| 9454 | 1, /* caseup_multiply */ | ||
| 9455 | 1, /* casedn_multiply */ | ||
| 9456 | 2, /* mbminlen */ | ||
| 9457 | 4, /* mbmaxlen */ | ||
| 9458 | 1, /* mbmaxlenlen */ | ||
| 9459 | 9, /* min_sort_char */ | ||
| 9460 | 0x10FFFF, /* max_sort_char */ | ||
| 9461 | 0x20, /* pad char */ | ||
| 9462 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9463 | 1, /* levels_for_compare */ | ||
| 9464 | &my_charset_utf16_handler, | ||
| 9465 | &my_collation_utf16_uca_handler, | ||
| 9466 | PAD_SPACE}; | ||
| 9467 | |||
| 9468 | CHARSET_INFO my_charset_utf16_vietnamese_ci = { | ||
| 9469 | 124, | ||
| 9470 | 0, | ||
| 9471 | 0, /* number */ | ||
| 9472 | MY_CS_UTF16_UCA_FLAGS, /* state */ | ||
| 9473 | "utf16", /* cs name */ | ||
| 9474 | "utf16_vietnamese_ci", /* m_coll_name */ | ||
| 9475 | "", /* comment */ | ||
| 9476 | vietnamese, /* tailoring */ | ||
| 9477 | nullptr, /* coll_param */ | ||
| 9478 | nullptr, /* ctype */ | ||
| 9479 | nullptr, /* to_lower */ | ||
| 9480 | nullptr, /* to_upper */ | ||
| 9481 | nullptr, /* sort_order */ | ||
| 9482 | nullptr, /* uca */ | ||
| 9483 | nullptr, /* tab_to_uni */ | ||
| 9484 | nullptr, /* tab_from_uni */ | ||
| 9485 | &my_unicase_default, /* caseinfo */ | ||
| 9486 | nullptr, /* state_map */ | ||
| 9487 | nullptr, /* ident_map */ | ||
| 9488 | 8, /* strxfrm_multiply */ | ||
| 9489 | 1, /* caseup_multiply */ | ||
| 9490 | 1, /* casedn_multiply */ | ||
| 9491 | 2, /* mbminlen */ | ||
| 9492 | 4, /* mbmaxlen */ | ||
| 9493 | 1, /* mbmaxlenlen */ | ||
| 9494 | 9, /* min_sort_char */ | ||
| 9495 | 0xFFFF, /* max_sort_char */ | ||
| 9496 | ' ', /* pad char */ | ||
| 9497 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9498 | 1, /* levels_for_compare */ | ||
| 9499 | &my_charset_utf16_handler, | ||
| 9500 | &my_collation_utf16_uca_handler, | ||
| 9501 | PAD_SPACE}; | ||
| 9502 | |||
| 9503 | MY_COLLATION_HANDLER my_collation_gb18030_uca_handler = { | ||
| 9504 | my_coll_init_uca, /* init */ | ||
| 9505 | my_coll_uninit_uca, | ||
| 9506 | my_strnncoll_any_uca, | ||
| 9507 | my_strnncollsp_any_uca, | ||
| 9508 | my_strnxfrm_any_uca, | ||
| 9509 | my_strnxfrmlen_simple, | ||
| 9510 | my_like_range_mb, | ||
| 9511 | my_wildcmp_uca, | ||
| 9512 | nullptr, | ||
| 9513 | my_instr_mb, | ||
| 9514 | my_hash_sort_any_uca, | ||
| 9515 | my_propagate_complex}; | ||
| 9516 | |||
| 9517 | /** | ||
| 9518 | The array used for "type of characters" bit mask for each | ||
| 9519 | character. The ctype[0] is reserved for EOF(-1), so we use | ||
| 9520 | ctype[(char)+1]. Also refer to strings/CHARSET_INFO.txt | ||
| 9521 | */ | ||
| 9522 | static const uchar ctype_gb18030[257] = { | ||
| 9523 | 0, /* For standard library */ | ||
| 9524 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, | ||
| 9525 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, | ||
| 9526 | 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, | ||
| 9527 | 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 16, 16, 16, 16, 16, 16, | ||
| 9528 | 16, 129, 129, 129, 129, 129, 129, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
| 9529 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, | ||
| 9530 | 16, 130, 130, 130, 130, 130, 130, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
| 9531 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, | ||
| 9532 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 9533 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 9534 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 9535 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 9536 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 9537 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 9538 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 9539 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0}; | ||
| 9540 | |||
| 9541 | extern MY_CHARSET_HANDLER my_charset_gb18030_uca_handler; | ||
| 9542 | |||
| 9543 | CHARSET_INFO my_charset_gb18030_unicode_520_ci = { | ||
| 9544 | 250, | ||
| 9545 | 0, | ||
| 9546 | 0, /* number */ | ||
| 9547 | MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_NONASCII, /* state */ | ||
| 9548 | "gb18030", /* cs name */ | ||
| 9549 | "gb18030_unicode_520_ci", /* m_coll_name */ | ||
| 9550 | "China National Standard GB18030", /* comment */ | ||
| 9551 | "", /* tailoring */ | ||
| 9552 | nullptr, /* coll_param */ | ||
| 9553 | ctype_gb18030, /* ctype */ | ||
| 9554 | nullptr, /* lower */ | ||
| 9555 | nullptr, /* UPPER */ | ||
| 9556 | nullptr, /* sort order */ | ||
| 9557 | &my_uca_v520, /* uca */ | ||
| 9558 | nullptr, /* tab_to_uni */ | ||
| 9559 | nullptr, /* tab_from_uni */ | ||
| 9560 | &my_unicase_unicode520, /* caseinfo */ | ||
| 9561 | nullptr, /* state_map */ | ||
| 9562 | nullptr, /* ident_map */ | ||
| 9563 | 8, /* strxfrm_multiply */ | ||
| 9564 | 2, /* caseup_multiply */ | ||
| 9565 | 2, /* casedn_multiply */ | ||
| 9566 | 1, /* mbminlen */ | ||
| 9567 | 4, /* mbmaxlen */ | ||
| 9568 | 2, /* mbmaxlenlen */ | ||
| 9569 | 0, /* min_sort_char */ | ||
| 9570 | 0xE3329A35, /* max_sort_char */ | ||
| 9571 | ' ', /* pad char */ | ||
| 9572 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9573 | 1, /* levels_for_compare */ | ||
| 9574 | &my_charset_gb18030_uca_handler, | ||
| 9575 | &my_collation_gb18030_uca_handler, | ||
| 9576 | PAD_SPACE}; | ||
| 9577 | |||
| 9578 | CHARSET_INFO my_charset_utf8mb4_0900_ai_ci = { | ||
| 9579 | 255, | ||
| 9580 | 0, | ||
| 9581 | 0, /* number */ | ||
| 9582 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_PRIMARY, /* state */ | ||
| 9583 | MY_UTF8MB4, /* csname */ | ||
| 9584 | MY_UTF8MB4 "_0900_ai_ci", /* m_coll_name */ | ||
| 9585 | "UTF-8 Unicode", /* comment */ | ||
| 9586 | nullptr, /* tailoring */ | ||
| 9587 | nullptr, /* coll_param */ | ||
| 9588 | ctype_utf8, /* ctype */ | ||
| 9589 | nullptr, /* to_lower */ | ||
| 9590 | nullptr, /* to_upper */ | ||
| 9591 | nullptr, /* sort_order */ | ||
| 9592 | &my_uca_v900, /* uca_900 */ | ||
| 9593 | nullptr, /* tab_to_uni */ | ||
| 9594 | nullptr, /* tab_from_uni */ | ||
| 9595 | &my_unicase_unicode900, /* caseinfo */ | ||
| 9596 | nullptr, /* state_map */ | ||
| 9597 | nullptr, /* ident_map */ | ||
| 9598 | 0, /* strxfrm_multiply */ | ||
| 9599 | 1, /* caseup_multiply */ | ||
| 9600 | 1, /* casedn_multiply */ | ||
| 9601 | 1, /* mbminlen */ | ||
| 9602 | 4, /* mbmaxlen */ | ||
| 9603 | 1, /* mbmaxlenlen */ | ||
| 9604 | 9, /* min_sort_char */ | ||
| 9605 | 0x10FFFF, /* max_sort_char */ | ||
| 9606 | ' ', /* pad char */ | ||
| 9607 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9608 | 1, /* levels_for_compare */ | ||
| 9609 | &my_charset_utf8mb4_handler, | ||
| 9610 | &my_collation_uca_900_handler, | ||
| 9611 | NO_PAD}; | ||
| 9612 | |||
| 9613 | CHARSET_INFO my_charset_utf8mb4_de_pb_0900_ai_ci = { | ||
| 9614 | 256, | ||
| 9615 | 0, | ||
| 9616 | 0, /* number */ | ||
| 9617 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 9618 | MY_UTF8MB4, /* csname */ | ||
| 9619 | MY_UTF8MB4 "_de_pb_0900_ai_ci", /* m_coll_name */ | ||
| 9620 | "", /* comment */ | ||
| 9621 | de_pb_cldr_30, /* tailoring */ | ||
| 9622 | nullptr, /* coll_param */ | ||
| 9623 | ctype_utf8, /* ctype */ | ||
| 9624 | nullptr, /* to_lower */ | ||
| 9625 | nullptr, /* to_upper */ | ||
| 9626 | nullptr, /* sort_order */ | ||
| 9627 | &my_uca_v900, /* uca_900 */ | ||
| 9628 | nullptr, /* tab_to_uni */ | ||
| 9629 | nullptr, /* tab_from_uni */ | ||
| 9630 | &my_unicase_unicode900, /* caseinfo */ | ||
| 9631 | nullptr, /* state_map */ | ||
| 9632 | nullptr, /* ident_map */ | ||
| 9633 | 0, /* strxfrm_multiply */ | ||
| 9634 | 1, /* caseup_multiply */ | ||
| 9635 | 1, /* casedn_multiply */ | ||
| 9636 | 1, /* mbminlen */ | ||
| 9637 | 4, /* mbmaxlen */ | ||
| 9638 | 1, /* mbmaxlenlen */ | ||
| 9639 | 9, /* min_sort_char */ | ||
| 9640 | 0x10FFFF, /* max_sort_char */ | ||
| 9641 | ' ', /* pad char */ | ||
| 9642 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9643 | 1, /* levels_for_compare */ | ||
| 9644 | &my_charset_utf8mb4_handler, | ||
| 9645 | &my_collation_uca_900_handler, | ||
| 9646 | NO_PAD}; | ||
| 9647 | |||
| 9648 | CHARSET_INFO my_charset_utf8mb4_is_0900_ai_ci = { | ||
| 9649 | 257, | ||
| 9650 | 0, | ||
| 9651 | 0, /* number */ | ||
| 9652 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 9653 | MY_UTF8MB4, /* csname */ | ||
| 9654 | MY_UTF8MB4 "_is_0900_ai_ci", /* m_coll_name */ | ||
| 9655 | "", /* comment */ | ||
| 9656 | is_cldr_30, /* tailoring */ | ||
| 9657 | nullptr, /* coll_param */ | ||
| 9658 | ctype_utf8, /* ctype */ | ||
| 9659 | nullptr, /* to_lower */ | ||
| 9660 | nullptr, /* to_upper */ | ||
| 9661 | nullptr, /* sort_order */ | ||
| 9662 | &my_uca_v900, /* uca */ | ||
| 9663 | nullptr, /* tab_to_uni */ | ||
| 9664 | nullptr, /* tab_from_uni */ | ||
| 9665 | &my_unicase_unicode900, /* caseinfo */ | ||
| 9666 | nullptr, /* state_map */ | ||
| 9667 | nullptr, /* ident_map */ | ||
| 9668 | 0, /* strxfrm_multiply */ | ||
| 9669 | 1, /* caseup_multiply */ | ||
| 9670 | 1, /* casedn_multiply */ | ||
| 9671 | 1, /* mbminlen */ | ||
| 9672 | 4, /* mbmaxlen */ | ||
| 9673 | 1, /* mbmaxlenlen */ | ||
| 9674 | 9, /* min_sort_char */ | ||
| 9675 | 0x10FFFF, /* max_sort_char */ | ||
| 9676 | ' ', /* pad char */ | ||
| 9677 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9678 | 1, /* levels_for_compare */ | ||
| 9679 | &my_charset_utf8mb4_handler, | ||
| 9680 | &my_collation_uca_900_handler, | ||
| 9681 | NO_PAD}; | ||
| 9682 | |||
| 9683 | CHARSET_INFO my_charset_utf8mb4_lv_0900_ai_ci = { | ||
| 9684 | 258, | ||
| 9685 | 0, | ||
| 9686 | 0, /* number */ | ||
| 9687 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 9688 | MY_UTF8MB4, /* csname */ | ||
| 9689 | MY_UTF8MB4 "_lv_0900_ai_ci", /* m_coll_name */ | ||
| 9690 | "", /* comment */ | ||
| 9691 | lv_cldr_30, /* tailoring */ | ||
| 9692 | nullptr, /* coll_param */ | ||
| 9693 | ctype_utf8, /* ctype */ | ||
| 9694 | nullptr, /* to_lower */ | ||
| 9695 | nullptr, /* to_upper */ | ||
| 9696 | nullptr, /* sort_order */ | ||
| 9697 | &my_uca_v900, /* uca */ | ||
| 9698 | nullptr, /* tab_to_uni */ | ||
| 9699 | nullptr, /* tab_from_uni */ | ||
| 9700 | &my_unicase_unicode900, /* caseinfo */ | ||
| 9701 | nullptr, /* state_map */ | ||
| 9702 | nullptr, /* ident_map */ | ||
| 9703 | 0, /* strxfrm_multiply */ | ||
| 9704 | 1, /* caseup_multiply */ | ||
| 9705 | 1, /* casedn_multiply */ | ||
| 9706 | 1, /* mbminlen */ | ||
| 9707 | 4, /* mbmaxlen */ | ||
| 9708 | 1, /* mbmaxlenlen */ | ||
| 9709 | 9, /* min_sort_char */ | ||
| 9710 | 0x10FFFF, /* max_sort_char */ | ||
| 9711 | ' ', /* pad char */ | ||
| 9712 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9713 | 1, /* levels_for_compare */ | ||
| 9714 | &my_charset_utf8mb4_handler, | ||
| 9715 | &my_collation_uca_900_handler, | ||
| 9716 | NO_PAD}; | ||
| 9717 | |||
| 9718 | CHARSET_INFO my_charset_utf8mb4_ro_0900_ai_ci = { | ||
| 9719 | 259, | ||
| 9720 | 0, | ||
| 9721 | 0, /* number */ | ||
| 9722 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 9723 | MY_UTF8MB4, /* csname */ | ||
| 9724 | MY_UTF8MB4 "_ro_0900_ai_ci", /* m_coll_name */ | ||
| 9725 | "", /* comment */ | ||
| 9726 | ro_cldr_30, /* tailoring */ | ||
| 9727 | nullptr, /* coll_param */ | ||
| 9728 | ctype_utf8, /* ctype */ | ||
| 9729 | nullptr, /* to_lower */ | ||
| 9730 | nullptr, /* to_upper */ | ||
| 9731 | nullptr, /* sort_order */ | ||
| 9732 | &my_uca_v900, /* uca */ | ||
| 9733 | nullptr, /* tab_to_uni */ | ||
| 9734 | nullptr, /* tab_from_uni */ | ||
| 9735 | &my_unicase_unicode900, /* caseinfo */ | ||
| 9736 | nullptr, /* state_map */ | ||
| 9737 | nullptr, /* ident_map */ | ||
| 9738 | 0, /* strxfrm_multiply */ | ||
| 9739 | 1, /* caseup_multiply */ | ||
| 9740 | 1, /* casedn_multiply */ | ||
| 9741 | 1, /* mbminlen */ | ||
| 9742 | 4, /* mbmaxlen */ | ||
| 9743 | 1, /* mbmaxlenlen */ | ||
| 9744 | 9, /* min_sort_char */ | ||
| 9745 | 0x10FFFF, /* max_sort_char */ | ||
| 9746 | ' ', /* pad char */ | ||
| 9747 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9748 | 1, /* levels_for_compare */ | ||
| 9749 | &my_charset_utf8mb4_handler, | ||
| 9750 | &my_collation_uca_900_handler, | ||
| 9751 | NO_PAD}; | ||
| 9752 | |||
| 9753 | CHARSET_INFO my_charset_utf8mb4_sl_0900_ai_ci = { | ||
| 9754 | 260, | ||
| 9755 | 0, | ||
| 9756 | 0, /* number */ | ||
| 9757 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 9758 | MY_UTF8MB4, /* csname */ | ||
| 9759 | MY_UTF8MB4 "_sl_0900_ai_ci", /* m_coll_name */ | ||
| 9760 | "", /* comment */ | ||
| 9761 | sl_cldr_30, /* tailoring */ | ||
| 9762 | nullptr, /* coll_param */ | ||
| 9763 | ctype_utf8, /* ctype */ | ||
| 9764 | nullptr, /* to_lower */ | ||
| 9765 | nullptr, /* to_upper */ | ||
| 9766 | nullptr, /* sort_order */ | ||
| 9767 | &my_uca_v900, /* uca */ | ||
| 9768 | nullptr, /* tab_to_uni */ | ||
| 9769 | nullptr, /* tab_from_uni */ | ||
| 9770 | &my_unicase_unicode900, /* caseinfo */ | ||
| 9771 | nullptr, /* state_map */ | ||
| 9772 | nullptr, /* ident_map */ | ||
| 9773 | 0, /* strxfrm_multiply */ | ||
| 9774 | 1, /* caseup_multiply */ | ||
| 9775 | 1, /* casedn_multiply */ | ||
| 9776 | 1, /* mbminlen */ | ||
| 9777 | 4, /* mbmaxlen */ | ||
| 9778 | 1, /* mbmaxlenlen */ | ||
| 9779 | 9, /* min_sort_char */ | ||
| 9780 | 0x10FFFF, /* max_sort_char */ | ||
| 9781 | ' ', /* pad char */ | ||
| 9782 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9783 | 1, /* levels_for_compare */ | ||
| 9784 | &my_charset_utf8mb4_handler, | ||
| 9785 | &my_collation_uca_900_handler, | ||
| 9786 | NO_PAD}; | ||
| 9787 | |||
| 9788 | CHARSET_INFO my_charset_utf8mb4_pl_0900_ai_ci = { | ||
| 9789 | 261, | ||
| 9790 | 0, | ||
| 9791 | 0, /* number */ | ||
| 9792 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 9793 | MY_UTF8MB4, /* csname */ | ||
| 9794 | MY_UTF8MB4 "_pl_0900_ai_ci", /* m_coll_name */ | ||
| 9795 | "", /* comment */ | ||
| 9796 | pl_cldr_30, /* tailoring */ | ||
| 9797 | nullptr, /* coll_param */ | ||
| 9798 | ctype_utf8, /* ctype */ | ||
| 9799 | nullptr, /* to_lower */ | ||
| 9800 | nullptr, /* to_upper */ | ||
| 9801 | nullptr, /* sort_order */ | ||
| 9802 | &my_uca_v900, /* uca */ | ||
| 9803 | nullptr, /* tab_to_uni */ | ||
| 9804 | nullptr, /* tab_from_uni */ | ||
| 9805 | &my_unicase_unicode900, /* caseinfo */ | ||
| 9806 | nullptr, /* state_map */ | ||
| 9807 | nullptr, /* ident_map */ | ||
| 9808 | 0, /* strxfrm_multiply */ | ||
| 9809 | 1, /* caseup_multiply */ | ||
| 9810 | 1, /* casedn_multiply */ | ||
| 9811 | 1, /* mbminlen */ | ||
| 9812 | 4, /* mbmaxlen */ | ||
| 9813 | 1, /* mbmaxlenlen */ | ||
| 9814 | 9, /* min_sort_char */ | ||
| 9815 | 0x10FFFF, /* max_sort_char */ | ||
| 9816 | ' ', /* pad char */ | ||
| 9817 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9818 | 1, /* levels_for_compare */ | ||
| 9819 | &my_charset_utf8mb4_handler, | ||
| 9820 | &my_collation_uca_900_handler, | ||
| 9821 | NO_PAD}; | ||
| 9822 | |||
| 9823 | CHARSET_INFO my_charset_utf8mb4_et_0900_ai_ci = { | ||
| 9824 | 262, | ||
| 9825 | 0, | ||
| 9826 | 0, /* number */ | ||
| 9827 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 9828 | MY_UTF8MB4, /* csname */ | ||
| 9829 | MY_UTF8MB4 "_et_0900_ai_ci", /* m_coll_name */ | ||
| 9830 | "", /* comment */ | ||
| 9831 | et_cldr_30, /* tailoring */ | ||
| 9832 | nullptr, /* coll_param */ | ||
| 9833 | ctype_utf8, /* ctype */ | ||
| 9834 | nullptr, /* to_lower */ | ||
| 9835 | nullptr, /* to_upper */ | ||
| 9836 | nullptr, /* sort_order */ | ||
| 9837 | &my_uca_v900, /* uca */ | ||
| 9838 | nullptr, /* tab_to_uni */ | ||
| 9839 | nullptr, /* tab_from_uni */ | ||
| 9840 | &my_unicase_unicode900, /* caseinfo */ | ||
| 9841 | nullptr, /* state_map */ | ||
| 9842 | nullptr, /* ident_map */ | ||
| 9843 | 0, /* strxfrm_multiply */ | ||
| 9844 | 1, /* caseup_multiply */ | ||
| 9845 | 1, /* casedn_multiply */ | ||
| 9846 | 1, /* mbminlen */ | ||
| 9847 | 4, /* mbmaxlen */ | ||
| 9848 | 1, /* mbmaxlenlen */ | ||
| 9849 | 9, /* min_sort_char */ | ||
| 9850 | 0x10FFFF, /* max_sort_char */ | ||
| 9851 | ' ', /* pad char */ | ||
| 9852 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9853 | 1, /* levels_for_compare */ | ||
| 9854 | &my_charset_utf8mb4_handler, | ||
| 9855 | &my_collation_uca_900_handler, | ||
| 9856 | NO_PAD}; | ||
| 9857 | |||
| 9858 | CHARSET_INFO my_charset_utf8mb4_es_0900_ai_ci = { | ||
| 9859 | 263, | ||
| 9860 | 0, | ||
| 9861 | 0, /* number */ | ||
| 9862 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 9863 | MY_UTF8MB4, /* csname */ | ||
| 9864 | MY_UTF8MB4 "_es_0900_ai_ci", /* m_coll_name */ | ||
| 9865 | "", /* comment */ | ||
| 9866 | spanish, /* tailoring */ | ||
| 9867 | nullptr, /* coll_param */ | ||
| 9868 | ctype_utf8, /* ctype */ | ||
| 9869 | nullptr, /* to_lower */ | ||
| 9870 | nullptr, /* to_upper */ | ||
| 9871 | nullptr, /* sort_order */ | ||
| 9872 | &my_uca_v900, /* uca */ | ||
| 9873 | nullptr, /* tab_to_uni */ | ||
| 9874 | nullptr, /* tab_from_uni */ | ||
| 9875 | &my_unicase_unicode900, /* caseinfo */ | ||
| 9876 | nullptr, /* state_map */ | ||
| 9877 | nullptr, /* ident_map */ | ||
| 9878 | 0, /* strxfrm_multiply */ | ||
| 9879 | 1, /* caseup_multiply */ | ||
| 9880 | 1, /* casedn_multiply */ | ||
| 9881 | 1, /* mbminlen */ | ||
| 9882 | 4, /* mbmaxlen */ | ||
| 9883 | 1, /* mbmaxlenlen */ | ||
| 9884 | 9, /* min_sort_char */ | ||
| 9885 | 0x10FFFF, /* max_sort_char */ | ||
| 9886 | ' ', /* pad char */ | ||
| 9887 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9888 | 1, /* levels_for_compare */ | ||
| 9889 | &my_charset_utf8mb4_handler, | ||
| 9890 | &my_collation_uca_900_handler, | ||
| 9891 | NO_PAD}; | ||
| 9892 | |||
| 9893 | CHARSET_INFO my_charset_utf8mb4_sv_0900_ai_ci = { | ||
| 9894 | 264, | ||
| 9895 | 0, | ||
| 9896 | 0, /* number */ | ||
| 9897 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 9898 | MY_UTF8MB4, /* csname */ | ||
| 9899 | MY_UTF8MB4 "_sv_0900_ai_ci", /* m_coll_name */ | ||
| 9900 | "", /* comment */ | ||
| 9901 | sv_cldr_30, /* tailoring */ | ||
| 9902 | nullptr, /* coll_param */ | ||
| 9903 | ctype_utf8, /* ctype */ | ||
| 9904 | nullptr, /* to_lower */ | ||
| 9905 | nullptr, /* to_upper */ | ||
| 9906 | nullptr, /* sort_order */ | ||
| 9907 | &my_uca_v900, /* uca */ | ||
| 9908 | nullptr, /* tab_to_uni */ | ||
| 9909 | nullptr, /* tab_from_uni */ | ||
| 9910 | &my_unicase_unicode900, /* caseinfo */ | ||
| 9911 | nullptr, /* state_map */ | ||
| 9912 | nullptr, /* ident_map */ | ||
| 9913 | 0, /* strxfrm_multiply */ | ||
| 9914 | 1, /* caseup_multiply */ | ||
| 9915 | 1, /* casedn_multiply */ | ||
| 9916 | 1, /* mbminlen */ | ||
| 9917 | 4, /* mbmaxlen */ | ||
| 9918 | 1, /* mbmaxlenlen */ | ||
| 9919 | 9, /* min_sort_char */ | ||
| 9920 | 0x10FFFF, /* max_sort_char */ | ||
| 9921 | ' ', /* pad char */ | ||
| 9922 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9923 | 1, /* levels_for_compare */ | ||
| 9924 | &my_charset_utf8mb4_handler, | ||
| 9925 | &my_collation_uca_900_handler, | ||
| 9926 | NO_PAD}; | ||
| 9927 | |||
| 9928 | CHARSET_INFO my_charset_utf8mb4_tr_0900_ai_ci = { | ||
| 9929 | 265, | ||
| 9930 | 0, | ||
| 9931 | 0, /* number */ | ||
| 9932 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 9933 | MY_UTF8MB4, /* csname */ | ||
| 9934 | MY_UTF8MB4 "_tr_0900_ai_ci", /* m_coll_name */ | ||
| 9935 | "", /* comment */ | ||
| 9936 | tr_cldr_30, /* tailoring */ | ||
| 9937 | nullptr, /* coll_param */ | ||
| 9938 | ctype_utf8, /* ctype */ | ||
| 9939 | nullptr, /* to_lower */ | ||
| 9940 | nullptr, /* to_upper */ | ||
| 9941 | nullptr, /* sort_order */ | ||
| 9942 | &my_uca_v900, /* uca */ | ||
| 9943 | nullptr, /* tab_to_uni */ | ||
| 9944 | nullptr, /* tab_from_uni */ | ||
| 9945 | &my_unicase_unicode900, /* caseinfo */ | ||
| 9946 | nullptr, /* state_map */ | ||
| 9947 | nullptr, /* ident_map */ | ||
| 9948 | 0, /* strxfrm_multiply */ | ||
| 9949 | 1, /* caseup_multiply */ | ||
| 9950 | 1, /* casedn_multiply */ | ||
| 9951 | 1, /* mbminlen */ | ||
| 9952 | 4, /* mbmaxlen */ | ||
| 9953 | 1, /* mbmaxlenlen */ | ||
| 9954 | 9, /* min_sort_char */ | ||
| 9955 | 0x10FFFF, /* max_sort_char */ | ||
| 9956 | ' ', /* pad char */ | ||
| 9957 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9958 | 1, /* levels_for_compare */ | ||
| 9959 | &my_charset_utf8mb4_handler, | ||
| 9960 | &my_collation_uca_900_handler, | ||
| 9961 | NO_PAD}; | ||
| 9962 | |||
| 9963 | CHARSET_INFO my_charset_utf8mb4_cs_0900_ai_ci = { | ||
| 9964 | 266, | ||
| 9965 | 0, | ||
| 9966 | 0, /* number */ | ||
| 9967 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 9968 | MY_UTF8MB4, /* csname */ | ||
| 9969 | MY_UTF8MB4 "_cs_0900_ai_ci", /* m_coll_name */ | ||
| 9970 | "", /* comment */ | ||
| 9971 | cs_cldr_30, /* tailoring */ | ||
| 9972 | nullptr, /* coll_param */ | ||
| 9973 | ctype_utf8, /* ctype */ | ||
| 9974 | nullptr, /* to_lower */ | ||
| 9975 | nullptr, /* to_upper */ | ||
| 9976 | nullptr, /* sort_order */ | ||
| 9977 | &my_uca_v900, /* uca */ | ||
| 9978 | nullptr, /* tab_to_uni */ | ||
| 9979 | nullptr, /* tab_from_uni */ | ||
| 9980 | &my_unicase_unicode900, /* caseinfo */ | ||
| 9981 | nullptr, /* state_map */ | ||
| 9982 | nullptr, /* ident_map */ | ||
| 9983 | 0, /* strxfrm_multiply */ | ||
| 9984 | 1, /* caseup_multiply */ | ||
| 9985 | 1, /* casedn_multiply */ | ||
| 9986 | 1, /* mbminlen */ | ||
| 9987 | 4, /* mbmaxlen */ | ||
| 9988 | 1, /* mbmaxlenlen */ | ||
| 9989 | 9, /* min_sort_char */ | ||
| 9990 | 0x10FFFF, /* max_sort_char */ | ||
| 9991 | ' ', /* pad char */ | ||
| 9992 | false, /* escape_with_backslash_is_dangerous */ | ||
| 9993 | 1, /* levels_for_compare */ | ||
| 9994 | &my_charset_utf8mb4_handler, | ||
| 9995 | &my_collation_uca_900_handler, | ||
| 9996 | NO_PAD}; | ||
| 9997 | |||
| 9998 | CHARSET_INFO my_charset_utf8mb4_da_0900_ai_ci = { | ||
| 9999 | 267, | ||
| 10000 | 0, | ||
| 10001 | 0, /* number */ | ||
| 10002 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 10003 | MY_UTF8MB4, /* csname */ | ||
| 10004 | MY_UTF8MB4 "_da_0900_ai_ci", /* m_coll_name */ | ||
| 10005 | "", /* comment */ | ||
| 10006 | da_cldr_30, /* tailoring */ | ||
| 10007 | nullptr, /* coll_param */ | ||
| 10008 | ctype_utf8, /* ctype */ | ||
| 10009 | nullptr, /* to_lower */ | ||
| 10010 | nullptr, /* to_upper */ | ||
| 10011 | nullptr, /* sort_order */ | ||
| 10012 | &my_uca_v900, /* uca */ | ||
| 10013 | nullptr, /* tab_to_uni */ | ||
| 10014 | nullptr, /* tab_from_uni */ | ||
| 10015 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10016 | nullptr, /* state_map */ | ||
| 10017 | nullptr, /* ident_map */ | ||
| 10018 | 0, /* strxfrm_multiply */ | ||
| 10019 | 1, /* caseup_multiply */ | ||
| 10020 | 1, /* casedn_multiply */ | ||
| 10021 | 1, /* mbminlen */ | ||
| 10022 | 4, /* mbmaxlen */ | ||
| 10023 | 1, /* mbmaxlenlen */ | ||
| 10024 | 9, /* min_sort_char */ | ||
| 10025 | 0x10FFFF, /* max_sort_char */ | ||
| 10026 | ' ', /* pad char */ | ||
| 10027 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10028 | 1, /* levels_for_compare */ | ||
| 10029 | &my_charset_utf8mb4_handler, | ||
| 10030 | &my_collation_uca_900_handler, | ||
| 10031 | NO_PAD}; | ||
| 10032 | |||
| 10033 | CHARSET_INFO my_charset_utf8mb4_lt_0900_ai_ci = { | ||
| 10034 | 268, | ||
| 10035 | 0, | ||
| 10036 | 0, /* number */ | ||
| 10037 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 10038 | MY_UTF8MB4, /* csname */ | ||
| 10039 | MY_UTF8MB4 "_lt_0900_ai_ci", /* m_coll_name */ | ||
| 10040 | "", /* comment */ | ||
| 10041 | lt_cldr_30, /* tailoring */ | ||
| 10042 | nullptr, /* coll_param */ | ||
| 10043 | ctype_utf8, /* ctype */ | ||
| 10044 | nullptr, /* to_lower */ | ||
| 10045 | nullptr, /* to_upper */ | ||
| 10046 | nullptr, /* sort_order */ | ||
| 10047 | &my_uca_v900, /* uca */ | ||
| 10048 | nullptr, /* tab_to_uni */ | ||
| 10049 | nullptr, /* tab_from_uni */ | ||
| 10050 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10051 | nullptr, /* state_map */ | ||
| 10052 | nullptr, /* ident_map */ | ||
| 10053 | 0, /* strxfrm_multiply */ | ||
| 10054 | 1, /* caseup_multiply */ | ||
| 10055 | 1, /* casedn_multiply */ | ||
| 10056 | 1, /* mbminlen */ | ||
| 10057 | 4, /* mbmaxlen */ | ||
| 10058 | 1, /* mbmaxlenlen */ | ||
| 10059 | 9, /* min_sort_char */ | ||
| 10060 | 0x10FFFF, /* max_sort_char */ | ||
| 10061 | ' ', /* pad char */ | ||
| 10062 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10063 | 1, /* levels_for_compare */ | ||
| 10064 | &my_charset_utf8mb4_handler, | ||
| 10065 | &my_collation_uca_900_handler, | ||
| 10066 | NO_PAD}; | ||
| 10067 | |||
| 10068 | CHARSET_INFO my_charset_utf8mb4_sk_0900_ai_ci = { | ||
| 10069 | 269, | ||
| 10070 | 0, | ||
| 10071 | 0, /* number */ | ||
| 10072 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 10073 | MY_UTF8MB4, /* csname */ | ||
| 10074 | MY_UTF8MB4 "_sk_0900_ai_ci", /* m_coll_name */ | ||
| 10075 | "", /* comment */ | ||
| 10076 | sk_cldr_30, /* tailoring */ | ||
| 10077 | nullptr, /* coll_param */ | ||
| 10078 | ctype_utf8, /* ctype */ | ||
| 10079 | nullptr, /* to_lower */ | ||
| 10080 | nullptr, /* to_upper */ | ||
| 10081 | nullptr, /* sort_order */ | ||
| 10082 | &my_uca_v900, /* uca */ | ||
| 10083 | nullptr, /* tab_to_uni */ | ||
| 10084 | nullptr, /* tab_from_uni */ | ||
| 10085 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10086 | nullptr, /* state_map */ | ||
| 10087 | nullptr, /* ident_map */ | ||
| 10088 | 0, /* strxfrm_multiply */ | ||
| 10089 | 1, /* caseup_multiply */ | ||
| 10090 | 1, /* casedn_multiply */ | ||
| 10091 | 1, /* mbminlen */ | ||
| 10092 | 4, /* mbmaxlen */ | ||
| 10093 | 1, /* mbmaxlenlen */ | ||
| 10094 | 9, /* min_sort_char */ | ||
| 10095 | 0x10FFFF, /* max_sort_char */ | ||
| 10096 | ' ', /* pad char */ | ||
| 10097 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10098 | 1, /* levels_for_compare */ | ||
| 10099 | &my_charset_utf8mb4_handler, | ||
| 10100 | &my_collation_uca_900_handler, | ||
| 10101 | NO_PAD}; | ||
| 10102 | |||
| 10103 | CHARSET_INFO my_charset_utf8mb4_es_trad_0900_ai_ci = { | ||
| 10104 | 270, | ||
| 10105 | 0, | ||
| 10106 | 0, /* number */ | ||
| 10107 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 10108 | MY_UTF8MB4, /* csname */ | ||
| 10109 | MY_UTF8MB4 "_es_trad_0900_ai_ci", /* m_coll_name */ | ||
| 10110 | "", /* comment */ | ||
| 10111 | es_trad_cldr_30, /* tailoring */ | ||
| 10112 | nullptr, /* coll_param */ | ||
| 10113 | ctype_utf8, /* ctype */ | ||
| 10114 | nullptr, /* to_lower */ | ||
| 10115 | nullptr, /* to_upper */ | ||
| 10116 | nullptr, /* sort_order */ | ||
| 10117 | &my_uca_v900, /* uca */ | ||
| 10118 | nullptr, /* tab_to_uni */ | ||
| 10119 | nullptr, /* tab_from_uni */ | ||
| 10120 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10121 | nullptr, /* state_map */ | ||
| 10122 | nullptr, /* ident_map */ | ||
| 10123 | 0, /* strxfrm_multiply */ | ||
| 10124 | 1, /* caseup_multiply */ | ||
| 10125 | 1, /* casedn_multiply */ | ||
| 10126 | 1, /* mbminlen */ | ||
| 10127 | 4, /* mbmaxlen */ | ||
| 10128 | 1, /* mbmaxlenlen */ | ||
| 10129 | 9, /* min_sort_char */ | ||
| 10130 | 0x10FFFF, /* max_sort_char */ | ||
| 10131 | ' ', /* pad char */ | ||
| 10132 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10133 | 1, /* levels_for_compare */ | ||
| 10134 | &my_charset_utf8mb4_handler, | ||
| 10135 | &my_collation_uca_900_handler, | ||
| 10136 | NO_PAD}; | ||
| 10137 | |||
| 10138 | CHARSET_INFO my_charset_utf8mb4_la_0900_ai_ci = { | ||
| 10139 | 271, | ||
| 10140 | 0, | ||
| 10141 | 0, /* number */ | ||
| 10142 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 10143 | MY_UTF8MB4, /* csname */ | ||
| 10144 | MY_UTF8MB4 "_la_0900_ai_ci", /* m_coll_name */ | ||
| 10145 | "", /* comment */ | ||
| 10146 | roman, /* tailoring */ | ||
| 10147 | nullptr, /* coll_param */ | ||
| 10148 | ctype_utf8, /* ctype */ | ||
| 10149 | nullptr, /* to_lower */ | ||
| 10150 | nullptr, /* to_upper */ | ||
| 10151 | nullptr, /* sort_order */ | ||
| 10152 | &my_uca_v900, /* uca */ | ||
| 10153 | nullptr, /* tab_to_uni */ | ||
| 10154 | nullptr, /* tab_from_uni */ | ||
| 10155 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10156 | nullptr, /* state_map */ | ||
| 10157 | nullptr, /* ident_map */ | ||
| 10158 | 0, /* strxfrm_multiply */ | ||
| 10159 | 1, /* caseup_multiply */ | ||
| 10160 | 1, /* casedn_multiply */ | ||
| 10161 | 1, /* mbminlen */ | ||
| 10162 | 4, /* mbmaxlen */ | ||
| 10163 | 1, /* mbmaxlenlen */ | ||
| 10164 | 9, /* min_sort_char */ | ||
| 10165 | 0x10FFFF, /* max_sort_char */ | ||
| 10166 | ' ', /* pad char */ | ||
| 10167 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10168 | 1, /* levels_for_compare */ | ||
| 10169 | &my_charset_utf8mb4_handler, | ||
| 10170 | &my_collation_uca_900_handler, | ||
| 10171 | NO_PAD}; | ||
| 10172 | |||
| 10173 | #if 0 | ||
| 10174 | CHARSET_INFO my_charset_utf8mb4_fa_0900_ai_ci= | ||
| 10175 | { | ||
| 10176 | 272, 0, 0, /* number */ | ||
| 10177 | MY_CS_UTF8MB4_UCA_FLAGS,/* state */ | ||
| 10178 | MY_UTF8MB4, /* csname */ | ||
| 10179 | MY_UTF8MB4 "_fa_0900_ai_ci",/* m_coll_name */ | ||
| 10180 | "", /* comment */ | ||
| 10181 | fa_cldr_30, /* tailoring */ | ||
| 10182 | &fa_coll_param, /* coll_param */ | ||
| 10183 | ctype_utf8, /* ctype */ | ||
| 10184 | NULL, /* to_lower */ | ||
| 10185 | NULL, /* to_upper */ | ||
| 10186 | NULL, /* sort_order */ | ||
| 10187 | &my_uca_v900, /* uca */ | ||
| 10188 | NULL, /* tab_to_uni */ | ||
| 10189 | NULL, /* tab_from_uni */ | ||
| 10190 | &my_unicase_unicode900,/* caseinfo */ | ||
| 10191 | NULL, /* state_map */ | ||
| 10192 | NULL, /* ident_map */ | ||
| 10193 | 0, /* strxfrm_multiply */ | ||
| 10194 | 1, /* caseup_multiply */ | ||
| 10195 | 1, /* casedn_multiply */ | ||
| 10196 | 1, /* mbminlen */ | ||
| 10197 | 4, /* mbmaxlen */ | ||
| 10198 | 1, /* mbmaxlenlen */ | ||
| 10199 | 9, /* min_sort_char */ | ||
| 10200 | 0x10FFFF, /* max_sort_char */ | ||
| 10201 | ' ', /* pad char */ | ||
| 10202 | 0, /* escape_with_backslash_is_dangerous */ | ||
| 10203 | 1, /* levels_for_compare */ | ||
| 10204 | &my_charset_utf8mb4_handler, | ||
| 10205 | &my_collation_uca_900_handler, | ||
| 10206 | NO_PAD | ||
| 10207 | }; | ||
| 10208 | #endif | ||
| 10209 | |||
| 10210 | CHARSET_INFO my_charset_utf8mb4_eo_0900_ai_ci = { | ||
| 10211 | 273, | ||
| 10212 | 0, | ||
| 10213 | 0, /* number */ | ||
| 10214 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 10215 | MY_UTF8MB4, /* csname */ | ||
| 10216 | MY_UTF8MB4 "_eo_0900_ai_ci", /* m_coll_name */ | ||
| 10217 | "", /* comment */ | ||
| 10218 | esperanto, /* tailoring */ | ||
| 10219 | nullptr, /* coll_param */ | ||
| 10220 | ctype_utf8, /* ctype */ | ||
| 10221 | nullptr, /* to_lower */ | ||
| 10222 | nullptr, /* to_upper */ | ||
| 10223 | nullptr, /* sort_order */ | ||
| 10224 | &my_uca_v900, /* uca */ | ||
| 10225 | nullptr, /* tab_to_uni */ | ||
| 10226 | nullptr, /* tab_from_uni */ | ||
| 10227 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10228 | nullptr, /* state_map */ | ||
| 10229 | nullptr, /* ident_map */ | ||
| 10230 | 0, /* strxfrm_multiply */ | ||
| 10231 | 1, /* caseup_multiply */ | ||
| 10232 | 1, /* casedn_multiply */ | ||
| 10233 | 1, /* mbminlen */ | ||
| 10234 | 4, /* mbmaxlen */ | ||
| 10235 | 1, /* mbmaxlenlen */ | ||
| 10236 | 9, /* min_sort_char */ | ||
| 10237 | 0x10FFFF, /* max_sort_char */ | ||
| 10238 | ' ', /* pad char */ | ||
| 10239 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10240 | 1, /* levels_for_compare */ | ||
| 10241 | &my_charset_utf8mb4_handler, | ||
| 10242 | &my_collation_uca_900_handler, | ||
| 10243 | NO_PAD}; | ||
| 10244 | |||
| 10245 | CHARSET_INFO my_charset_utf8mb4_hu_0900_ai_ci = { | ||
| 10246 | 274, | ||
| 10247 | 0, | ||
| 10248 | 0, /* number */ | ||
| 10249 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 10250 | MY_UTF8MB4, /* csname */ | ||
| 10251 | MY_UTF8MB4 "_hu_0900_ai_ci", /* m_coll_name */ | ||
| 10252 | "", /* comment */ | ||
| 10253 | hu_cldr_30, /* tailoring */ | ||
| 10254 | nullptr, /* coll_param */ | ||
| 10255 | ctype_utf8, /* ctype */ | ||
| 10256 | nullptr, /* to_lower */ | ||
| 10257 | nullptr, /* to_upper */ | ||
| 10258 | nullptr, /* sort_order */ | ||
| 10259 | &my_uca_v900, /* uca */ | ||
| 10260 | nullptr, /* tab_to_uni */ | ||
| 10261 | nullptr, /* tab_from_uni */ | ||
| 10262 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10263 | nullptr, /* state_map */ | ||
| 10264 | nullptr, /* ident_map */ | ||
| 10265 | 0, /* strxfrm_multiply */ | ||
| 10266 | 1, /* caseup_multiply */ | ||
| 10267 | 1, /* casedn_multiply */ | ||
| 10268 | 1, /* mbminlen */ | ||
| 10269 | 4, /* mbmaxlen */ | ||
| 10270 | 1, /* mbmaxlenlen */ | ||
| 10271 | 9, /* min_sort_char */ | ||
| 10272 | 0x10FFFF, /* max_sort_char */ | ||
| 10273 | ' ', /* pad char */ | ||
| 10274 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10275 | 1, /* levels_for_compare */ | ||
| 10276 | &my_charset_utf8mb4_handler, | ||
| 10277 | &my_collation_uca_900_handler, | ||
| 10278 | NO_PAD}; | ||
| 10279 | |||
| 10280 | CHARSET_INFO my_charset_utf8mb4_hr_0900_ai_ci = { | ||
| 10281 | 275, | ||
| 10282 | 0, | ||
| 10283 | 0, /* number */ | ||
| 10284 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 10285 | MY_UTF8MB4, /* csname */ | ||
| 10286 | MY_UTF8MB4 "_hr_0900_ai_ci", /* m_coll_name */ | ||
| 10287 | "", /* comment */ | ||
| 10288 | hr_cldr_30, /* tailoring */ | ||
| 10289 | &hr_coll_param, /* coll_param */ | ||
| 10290 | ctype_utf8, /* ctype */ | ||
| 10291 | nullptr, /* to_lower */ | ||
| 10292 | nullptr, /* to_upper */ | ||
| 10293 | nullptr, /* sort_order */ | ||
| 10294 | &my_uca_v900, /* uca */ | ||
| 10295 | nullptr, /* tab_to_uni */ | ||
| 10296 | nullptr, /* tab_from_uni */ | ||
| 10297 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10298 | nullptr, /* state_map */ | ||
| 10299 | nullptr, /* ident_map */ | ||
| 10300 | 0, /* strxfrm_multiply */ | ||
| 10301 | 1, /* caseup_multiply */ | ||
| 10302 | 1, /* casedn_multiply */ | ||
| 10303 | 1, /* mbminlen */ | ||
| 10304 | 4, /* mbmaxlen */ | ||
| 10305 | 1, /* mbmaxlenlen */ | ||
| 10306 | 9, /* min_sort_char */ | ||
| 10307 | 0x10FFFF, /* max_sort_char */ | ||
| 10308 | ' ', /* pad char */ | ||
| 10309 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10310 | 1, /* levels_for_compare */ | ||
| 10311 | &my_charset_utf8mb4_handler, | ||
| 10312 | &my_collation_uca_900_handler, | ||
| 10313 | NO_PAD}; | ||
| 10314 | |||
| 10315 | #if 0 | ||
| 10316 | CHARSET_INFO my_charset_utf8mb4_si_0900_ai_ci= | ||
| 10317 | { | ||
| 10318 | 276, 0, 0, /* number */ | ||
| 10319 | MY_CS_UTF8MB4_UCA_FLAGS,/* state */ | ||
| 10320 | MY_UTF8MB4, /* csname */ | ||
| 10321 | MY_UTF8MB4 "_si_0900_ai_ci",/* m_coll_name */ | ||
| 10322 | "", /* comment */ | ||
| 10323 | si_cldr_30, /* tailoring */ | ||
| 10324 | NULL, /* coll_param */ | ||
| 10325 | ctype_utf8, /* ctype */ | ||
| 10326 | NULL, /* to_lower */ | ||
| 10327 | NULL, /* to_upper */ | ||
| 10328 | NULL, /* sort_order */ | ||
| 10329 | &my_uca_v900, /* uca */ | ||
| 10330 | NULL, /* tab_to_uni */ | ||
| 10331 | NULL, /* tab_from_uni */ | ||
| 10332 | &my_unicase_unicode900,/* caseinfo */ | ||
| 10333 | NULL, /* state_map */ | ||
| 10334 | NULL, /* ident_map */ | ||
| 10335 | 0, /* strxfrm_multiply */ | ||
| 10336 | 1, /* caseup_multiply */ | ||
| 10337 | 1, /* casedn_multiply */ | ||
| 10338 | 1, /* mbminlen */ | ||
| 10339 | 4, /* mbmaxlen */ | ||
| 10340 | 1, /* mbmaxlenlen */ | ||
| 10341 | 9, /* min_sort_char */ | ||
| 10342 | 0x10FFFF, /* max_sort_char */ | ||
| 10343 | ' ', /* pad char */ | ||
| 10344 | 0, /* escape_with_backslash_is_dangerous */ | ||
| 10345 | 1, /* levels_for_compare */ | ||
| 10346 | &my_charset_utf8mb4_handler, | ||
| 10347 | &my_collation_uca_900_handler, | ||
| 10348 | NO_PAD | ||
| 10349 | }; | ||
| 10350 | #endif | ||
| 10351 | |||
| 10352 | CHARSET_INFO my_charset_utf8mb4_vi_0900_ai_ci = { | ||
| 10353 | 277, | ||
| 10354 | 0, | ||
| 10355 | 0, /* number */ | ||
| 10356 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 10357 | MY_UTF8MB4, /* csname */ | ||
| 10358 | MY_UTF8MB4 "_vi_0900_ai_ci", /* m_coll_name */ | ||
| 10359 | "", /* comment */ | ||
| 10360 | vi_cldr_30, /* tailoring */ | ||
| 10361 | nullptr, /* coll_param */ | ||
| 10362 | ctype_utf8, /* ctype */ | ||
| 10363 | nullptr, /* to_lower */ | ||
| 10364 | nullptr, /* to_upper */ | ||
| 10365 | nullptr, /* sort_order */ | ||
| 10366 | &my_uca_v900, /* uca */ | ||
| 10367 | nullptr, /* tab_to_uni */ | ||
| 10368 | nullptr, /* tab_from_uni */ | ||
| 10369 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10370 | nullptr, /* state_map */ | ||
| 10371 | nullptr, /* ident_map */ | ||
| 10372 | 0, /* strxfrm_multiply */ | ||
| 10373 | 1, /* caseup_multiply */ | ||
| 10374 | 1, /* casedn_multiply */ | ||
| 10375 | 1, /* mbminlen */ | ||
| 10376 | 4, /* mbmaxlen */ | ||
| 10377 | 1, /* mbmaxlenlen */ | ||
| 10378 | 9, /* min_sort_char */ | ||
| 10379 | 0x10FFFF, /* max_sort_char */ | ||
| 10380 | ' ', /* pad char */ | ||
| 10381 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10382 | 1, /* levels_for_compare */ | ||
| 10383 | &my_charset_utf8mb4_handler, | ||
| 10384 | &my_collation_uca_900_handler, | ||
| 10385 | NO_PAD}; | ||
| 10386 | |||
| 10387 | CHARSET_INFO my_charset_utf8mb4_0900_as_cs = { | ||
| 10388 | 278, | ||
| 10389 | 0, | ||
| 10390 | 0, /* number */ | ||
| 10391 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10392 | MY_UTF8MB4, /* csname */ | ||
| 10393 | MY_UTF8MB4 "_0900_as_cs", /* m_coll_name */ | ||
| 10394 | "", /* comment */ | ||
| 10395 | nullptr, /* tailoring */ | ||
| 10396 | nullptr, /* coll_param */ | ||
| 10397 | ctype_utf8, /* ctype */ | ||
| 10398 | nullptr, /* to_lower */ | ||
| 10399 | nullptr, /* to_upper */ | ||
| 10400 | nullptr, /* sort_order */ | ||
| 10401 | &my_uca_v900, /* uca */ | ||
| 10402 | nullptr, /* tab_to_uni */ | ||
| 10403 | nullptr, /* tab_from_uni */ | ||
| 10404 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10405 | nullptr, /* state_map */ | ||
| 10406 | nullptr, /* ident_map */ | ||
| 10407 | 0, /* strxfrm_multiply */ | ||
| 10408 | 1, /* caseup_multiply */ | ||
| 10409 | 1, /* casedn_multiply */ | ||
| 10410 | 1, /* mbminlen */ | ||
| 10411 | 4, /* mbmaxlen */ | ||
| 10412 | 1, /* mbmaxlenlen */ | ||
| 10413 | 9, /* min_sort_char */ | ||
| 10414 | 0x10FFFF, /* max_sort_char */ | ||
| 10415 | ' ', /* pad char */ | ||
| 10416 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10417 | 3, /* levels_for_compare */ | ||
| 10418 | &my_charset_utf8mb4_handler, | ||
| 10419 | &my_collation_uca_900_handler, | ||
| 10420 | NO_PAD}; | ||
| 10421 | |||
| 10422 | CHARSET_INFO my_charset_utf8mb4_de_pb_0900_as_cs = { | ||
| 10423 | 279, | ||
| 10424 | 0, | ||
| 10425 | 0, /* number */ | ||
| 10426 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10427 | MY_UTF8MB4, /* csname */ | ||
| 10428 | MY_UTF8MB4 "_de_pb_0900_as_cs", /* m_coll_name */ | ||
| 10429 | "", /* comment */ | ||
| 10430 | de_pb_cldr_30, /* tailoring */ | ||
| 10431 | nullptr, /* coll_param */ | ||
| 10432 | ctype_utf8, /* ctype */ | ||
| 10433 | nullptr, /* to_lower */ | ||
| 10434 | nullptr, /* to_upper */ | ||
| 10435 | nullptr, /* sort_order */ | ||
| 10436 | &my_uca_v900, /* uca */ | ||
| 10437 | nullptr, /* tab_to_uni */ | ||
| 10438 | nullptr, /* tab_from_uni */ | ||
| 10439 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10440 | nullptr, /* state_map */ | ||
| 10441 | nullptr, /* ident_map */ | ||
| 10442 | 0, /* strxfrm_multiply */ | ||
| 10443 | 1, /* caseup_multiply */ | ||
| 10444 | 1, /* casedn_multiply */ | ||
| 10445 | 1, /* mbminlen */ | ||
| 10446 | 4, /* mbmaxlen */ | ||
| 10447 | 1, /* mbmaxlenlen */ | ||
| 10448 | 9, /* min_sort_char */ | ||
| 10449 | 0x10FFFF, /* max_sort_char */ | ||
| 10450 | ' ', /* pad char */ | ||
| 10451 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10452 | 3, /* levels_for_compare */ | ||
| 10453 | &my_charset_utf8mb4_handler, | ||
| 10454 | &my_collation_uca_900_handler, | ||
| 10455 | NO_PAD}; | ||
| 10456 | |||
| 10457 | CHARSET_INFO my_charset_utf8mb4_is_0900_as_cs = { | ||
| 10458 | 280, | ||
| 10459 | 0, | ||
| 10460 | 0, /* number */ | ||
| 10461 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10462 | MY_UTF8MB4, /* csname */ | ||
| 10463 | MY_UTF8MB4 "_is_0900_as_cs", /* m_coll_name */ | ||
| 10464 | "", /* comment */ | ||
| 10465 | is_cldr_30, /* tailoring */ | ||
| 10466 | nullptr, /* coll_param */ | ||
| 10467 | ctype_utf8, /* ctype */ | ||
| 10468 | nullptr, /* to_lower */ | ||
| 10469 | nullptr, /* to_upper */ | ||
| 10470 | nullptr, /* sort_order */ | ||
| 10471 | &my_uca_v900, /* uca */ | ||
| 10472 | nullptr, /* tab_to_uni */ | ||
| 10473 | nullptr, /* tab_from_uni */ | ||
| 10474 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10475 | nullptr, /* state_map */ | ||
| 10476 | nullptr, /* ident_map */ | ||
| 10477 | 0, /* strxfrm_multiply */ | ||
| 10478 | 1, /* caseup_multiply */ | ||
| 10479 | 1, /* casedn_multiply */ | ||
| 10480 | 1, /* mbminlen */ | ||
| 10481 | 4, /* mbmaxlen */ | ||
| 10482 | 1, /* mbmaxlenlen */ | ||
| 10483 | 9, /* min_sort_char */ | ||
| 10484 | 0x10FFFF, /* max_sort_char */ | ||
| 10485 | ' ', /* pad char */ | ||
| 10486 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10487 | 3, /* levels_for_compare */ | ||
| 10488 | &my_charset_utf8mb4_handler, | ||
| 10489 | &my_collation_uca_900_handler, | ||
| 10490 | NO_PAD}; | ||
| 10491 | |||
| 10492 | CHARSET_INFO my_charset_utf8mb4_lv_0900_as_cs = { | ||
| 10493 | 281, | ||
| 10494 | 0, | ||
| 10495 | 0, /* number */ | ||
| 10496 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10497 | MY_UTF8MB4, /* csname */ | ||
| 10498 | MY_UTF8MB4 "_lv_0900_as_cs", /* m_coll_name */ | ||
| 10499 | "", /* comment */ | ||
| 10500 | lv_cldr_30, /* tailoring */ | ||
| 10501 | nullptr, /* coll_param */ | ||
| 10502 | ctype_utf8, /* ctype */ | ||
| 10503 | nullptr, /* to_lower */ | ||
| 10504 | nullptr, /* to_upper */ | ||
| 10505 | nullptr, /* sort_order */ | ||
| 10506 | &my_uca_v900, /* uca */ | ||
| 10507 | nullptr, /* tab_to_uni */ | ||
| 10508 | nullptr, /* tab_from_uni */ | ||
| 10509 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10510 | nullptr, /* state_map */ | ||
| 10511 | nullptr, /* ident_map */ | ||
| 10512 | 0, /* strxfrm_multiply */ | ||
| 10513 | 1, /* caseup_multiply */ | ||
| 10514 | 1, /* casedn_multiply */ | ||
| 10515 | 1, /* mbminlen */ | ||
| 10516 | 4, /* mbmaxlen */ | ||
| 10517 | 1, /* mbmaxlenlen */ | ||
| 10518 | 9, /* min_sort_char */ | ||
| 10519 | 0x10FFFF, /* max_sort_char */ | ||
| 10520 | ' ', /* pad char */ | ||
| 10521 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10522 | 3, /* levels_for_compare */ | ||
| 10523 | &my_charset_utf8mb4_handler, | ||
| 10524 | &my_collation_uca_900_handler, | ||
| 10525 | NO_PAD}; | ||
| 10526 | |||
| 10527 | CHARSET_INFO my_charset_utf8mb4_ro_0900_as_cs = { | ||
| 10528 | 282, | ||
| 10529 | 0, | ||
| 10530 | 0, /* number */ | ||
| 10531 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10532 | MY_UTF8MB4, /* csname */ | ||
| 10533 | MY_UTF8MB4 "_ro_0900_as_cs", /* m_coll_name */ | ||
| 10534 | "", /* comment */ | ||
| 10535 | ro_cldr_30, /* tailoring */ | ||
| 10536 | nullptr, /* coll_param */ | ||
| 10537 | ctype_utf8, /* ctype */ | ||
| 10538 | nullptr, /* to_lower */ | ||
| 10539 | nullptr, /* to_upper */ | ||
| 10540 | nullptr, /* sort_order */ | ||
| 10541 | &my_uca_v900, /* uca */ | ||
| 10542 | nullptr, /* tab_to_uni */ | ||
| 10543 | nullptr, /* tab_from_uni */ | ||
| 10544 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10545 | nullptr, /* state_map */ | ||
| 10546 | nullptr, /* ident_map */ | ||
| 10547 | 0, /* strxfrm_multiply */ | ||
| 10548 | 1, /* caseup_multiply */ | ||
| 10549 | 1, /* casedn_multiply */ | ||
| 10550 | 1, /* mbminlen */ | ||
| 10551 | 4, /* mbmaxlen */ | ||
| 10552 | 1, /* mbmaxlenlen */ | ||
| 10553 | 9, /* min_sort_char */ | ||
| 10554 | 0x10FFFF, /* max_sort_char */ | ||
| 10555 | ' ', /* pad char */ | ||
| 10556 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10557 | 3, /* levels_for_compare */ | ||
| 10558 | &my_charset_utf8mb4_handler, | ||
| 10559 | &my_collation_uca_900_handler, | ||
| 10560 | NO_PAD}; | ||
| 10561 | |||
| 10562 | CHARSET_INFO my_charset_utf8mb4_sl_0900_as_cs = { | ||
| 10563 | 283, | ||
| 10564 | 0, | ||
| 10565 | 0, /* number */ | ||
| 10566 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10567 | MY_UTF8MB4, /* csname */ | ||
| 10568 | MY_UTF8MB4 "_sl_0900_as_cs", /* m_coll_name */ | ||
| 10569 | "", /* comment */ | ||
| 10570 | sl_cldr_30, /* tailoring */ | ||
| 10571 | nullptr, /* coll_param */ | ||
| 10572 | ctype_utf8, /* ctype */ | ||
| 10573 | nullptr, /* to_lower */ | ||
| 10574 | nullptr, /* to_upper */ | ||
| 10575 | nullptr, /* sort_order */ | ||
| 10576 | &my_uca_v900, /* uca */ | ||
| 10577 | nullptr, /* tab_to_uni */ | ||
| 10578 | nullptr, /* tab_from_uni */ | ||
| 10579 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10580 | nullptr, /* state_map */ | ||
| 10581 | nullptr, /* ident_map */ | ||
| 10582 | 0, /* strxfrm_multiply */ | ||
| 10583 | 1, /* caseup_multiply */ | ||
| 10584 | 1, /* casedn_multiply */ | ||
| 10585 | 1, /* mbminlen */ | ||
| 10586 | 4, /* mbmaxlen */ | ||
| 10587 | 1, /* mbmaxlenlen */ | ||
| 10588 | 9, /* min_sort_char */ | ||
| 10589 | 0x10FFFF, /* max_sort_char */ | ||
| 10590 | ' ', /* pad char */ | ||
| 10591 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10592 | 3, /* levels_for_compare */ | ||
| 10593 | &my_charset_utf8mb4_handler, | ||
| 10594 | &my_collation_uca_900_handler, | ||
| 10595 | NO_PAD}; | ||
| 10596 | |||
| 10597 | CHARSET_INFO my_charset_utf8mb4_pl_0900_as_cs = { | ||
| 10598 | 284, | ||
| 10599 | 0, | ||
| 10600 | 0, /* number */ | ||
| 10601 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10602 | MY_UTF8MB4, /* csname */ | ||
| 10603 | MY_UTF8MB4 "_pl_0900_as_cs", /* m_coll_name */ | ||
| 10604 | "", /* comment */ | ||
| 10605 | pl_cldr_30, /* tailoring */ | ||
| 10606 | nullptr, /* coll_param */ | ||
| 10607 | ctype_utf8, /* ctype */ | ||
| 10608 | nullptr, /* to_lower */ | ||
| 10609 | nullptr, /* to_upper */ | ||
| 10610 | nullptr, /* sort_order */ | ||
| 10611 | &my_uca_v900, /* uca */ | ||
| 10612 | nullptr, /* tab_to_uni */ | ||
| 10613 | nullptr, /* tab_from_uni */ | ||
| 10614 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10615 | nullptr, /* state_map */ | ||
| 10616 | nullptr, /* ident_map */ | ||
| 10617 | 0, /* strxfrm_multiply */ | ||
| 10618 | 1, /* caseup_multiply */ | ||
| 10619 | 1, /* casedn_multiply */ | ||
| 10620 | 1, /* mbminlen */ | ||
| 10621 | 4, /* mbmaxlen */ | ||
| 10622 | 1, /* mbmaxlenlen */ | ||
| 10623 | 9, /* min_sort_char */ | ||
| 10624 | 0x10FFFF, /* max_sort_char */ | ||
| 10625 | ' ', /* pad char */ | ||
| 10626 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10627 | 3, /* levels_for_compare */ | ||
| 10628 | &my_charset_utf8mb4_handler, | ||
| 10629 | &my_collation_uca_900_handler, | ||
| 10630 | NO_PAD}; | ||
| 10631 | |||
| 10632 | CHARSET_INFO my_charset_utf8mb4_et_0900_as_cs = { | ||
| 10633 | 285, | ||
| 10634 | 0, | ||
| 10635 | 0, /* number */ | ||
| 10636 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10637 | MY_UTF8MB4, /* csname */ | ||
| 10638 | MY_UTF8MB4 "_et_0900_as_cs", /* m_coll_name */ | ||
| 10639 | "", /* comment */ | ||
| 10640 | et_cldr_30, /* tailoring */ | ||
| 10641 | nullptr, /* coll_param */ | ||
| 10642 | ctype_utf8, /* ctype */ | ||
| 10643 | nullptr, /* to_lower */ | ||
| 10644 | nullptr, /* to_upper */ | ||
| 10645 | nullptr, /* sort_order */ | ||
| 10646 | &my_uca_v900, /* uca */ | ||
| 10647 | nullptr, /* tab_to_uni */ | ||
| 10648 | nullptr, /* tab_from_uni */ | ||
| 10649 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10650 | nullptr, /* state_map */ | ||
| 10651 | nullptr, /* ident_map */ | ||
| 10652 | 0, /* strxfrm_multiply */ | ||
| 10653 | 1, /* caseup_multiply */ | ||
| 10654 | 1, /* casedn_multiply */ | ||
| 10655 | 1, /* mbminlen */ | ||
| 10656 | 4, /* mbmaxlen */ | ||
| 10657 | 1, /* mbmaxlenlen */ | ||
| 10658 | 9, /* min_sort_char */ | ||
| 10659 | 0x10FFFF, /* max_sort_char */ | ||
| 10660 | ' ', /* pad char */ | ||
| 10661 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10662 | 3, /* levels_for_compare */ | ||
| 10663 | &my_charset_utf8mb4_handler, | ||
| 10664 | &my_collation_uca_900_handler, | ||
| 10665 | NO_PAD}; | ||
| 10666 | |||
| 10667 | CHARSET_INFO my_charset_utf8mb4_es_0900_as_cs = { | ||
| 10668 | 286, | ||
| 10669 | 0, | ||
| 10670 | 0, /* number */ | ||
| 10671 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10672 | MY_UTF8MB4, /* csname */ | ||
| 10673 | MY_UTF8MB4 "_es_0900_as_cs", /* m_coll_name */ | ||
| 10674 | "", /* comment */ | ||
| 10675 | spanish, /* tailoring */ | ||
| 10676 | nullptr, /* coll_param */ | ||
| 10677 | ctype_utf8, /* ctype */ | ||
| 10678 | nullptr, /* to_lower */ | ||
| 10679 | nullptr, /* to_upper */ | ||
| 10680 | nullptr, /* sort_order */ | ||
| 10681 | &my_uca_v900, /* uca */ | ||
| 10682 | nullptr, /* tab_to_uni */ | ||
| 10683 | nullptr, /* tab_from_uni */ | ||
| 10684 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10685 | nullptr, /* state_map */ | ||
| 10686 | nullptr, /* ident_map */ | ||
| 10687 | 0, /* strxfrm_multiply */ | ||
| 10688 | 1, /* caseup_multiply */ | ||
| 10689 | 1, /* casedn_multiply */ | ||
| 10690 | 1, /* mbminlen */ | ||
| 10691 | 4, /* mbmaxlen */ | ||
| 10692 | 1, /* mbmaxlenlen */ | ||
| 10693 | 9, /* min_sort_char */ | ||
| 10694 | 0x10FFFF, /* max_sort_char */ | ||
| 10695 | ' ', /* pad char */ | ||
| 10696 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10697 | 3, /* levels_for_compare */ | ||
| 10698 | &my_charset_utf8mb4_handler, | ||
| 10699 | &my_collation_uca_900_handler, | ||
| 10700 | NO_PAD}; | ||
| 10701 | |||
| 10702 | CHARSET_INFO my_charset_utf8mb4_sv_0900_as_cs = { | ||
| 10703 | 287, | ||
| 10704 | 0, | ||
| 10705 | 0, /* number */ | ||
| 10706 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10707 | MY_UTF8MB4, /* csname */ | ||
| 10708 | MY_UTF8MB4 "_sv_0900_as_cs", /* m_coll_name */ | ||
| 10709 | "", /* comment */ | ||
| 10710 | sv_cldr_30, /* tailoring */ | ||
| 10711 | nullptr, /* coll_param */ | ||
| 10712 | ctype_utf8, /* ctype */ | ||
| 10713 | nullptr, /* to_lower */ | ||
| 10714 | nullptr, /* to_upper */ | ||
| 10715 | nullptr, /* sort_order */ | ||
| 10716 | &my_uca_v900, /* uca */ | ||
| 10717 | nullptr, /* tab_to_uni */ | ||
| 10718 | nullptr, /* tab_from_uni */ | ||
| 10719 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10720 | nullptr, /* state_map */ | ||
| 10721 | nullptr, /* ident_map */ | ||
| 10722 | 0, /* strxfrm_multiply */ | ||
| 10723 | 1, /* caseup_multiply */ | ||
| 10724 | 1, /* casedn_multiply */ | ||
| 10725 | 1, /* mbminlen */ | ||
| 10726 | 4, /* mbmaxlen */ | ||
| 10727 | 1, /* mbmaxlenlen */ | ||
| 10728 | 9, /* min_sort_char */ | ||
| 10729 | 0x10FFFF, /* max_sort_char */ | ||
| 10730 | ' ', /* pad char */ | ||
| 10731 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10732 | 3, /* levels_for_compare */ | ||
| 10733 | &my_charset_utf8mb4_handler, | ||
| 10734 | &my_collation_uca_900_handler, | ||
| 10735 | NO_PAD}; | ||
| 10736 | |||
| 10737 | CHARSET_INFO my_charset_utf8mb4_tr_0900_as_cs = { | ||
| 10738 | 288, | ||
| 10739 | 0, | ||
| 10740 | 0, /* number */ | ||
| 10741 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10742 | MY_UTF8MB4, /* csname */ | ||
| 10743 | MY_UTF8MB4 "_tr_0900_as_cs", /* m_coll_name */ | ||
| 10744 | "", /* comment */ | ||
| 10745 | tr_cldr_30, /* tailoring */ | ||
| 10746 | nullptr, /* coll_param */ | ||
| 10747 | ctype_utf8, /* ctype */ | ||
| 10748 | nullptr, /* to_lower */ | ||
| 10749 | nullptr, /* to_upper */ | ||
| 10750 | nullptr, /* sort_order */ | ||
| 10751 | &my_uca_v900, /* uca */ | ||
| 10752 | nullptr, /* tab_to_uni */ | ||
| 10753 | nullptr, /* tab_from_uni */ | ||
| 10754 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10755 | nullptr, /* state_map */ | ||
| 10756 | nullptr, /* ident_map */ | ||
| 10757 | 0, /* strxfrm_multiply */ | ||
| 10758 | 1, /* caseup_multiply */ | ||
| 10759 | 1, /* casedn_multiply */ | ||
| 10760 | 1, /* mbminlen */ | ||
| 10761 | 4, /* mbmaxlen */ | ||
| 10762 | 1, /* mbmaxlenlen */ | ||
| 10763 | 9, /* min_sort_char */ | ||
| 10764 | 0x10FFFF, /* max_sort_char */ | ||
| 10765 | ' ', /* pad char */ | ||
| 10766 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10767 | 3, /* levels_for_compare */ | ||
| 10768 | &my_charset_utf8mb4_handler, | ||
| 10769 | &my_collation_uca_900_handler, | ||
| 10770 | NO_PAD}; | ||
| 10771 | |||
| 10772 | CHARSET_INFO my_charset_utf8mb4_cs_0900_as_cs = { | ||
| 10773 | 289, | ||
| 10774 | 0, | ||
| 10775 | 0, /* number */ | ||
| 10776 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10777 | MY_UTF8MB4, /* csname */ | ||
| 10778 | MY_UTF8MB4 "_cs_0900_as_cs", /* m_coll_name */ | ||
| 10779 | "", /* comment */ | ||
| 10780 | cs_cldr_30, /* tailoring */ | ||
| 10781 | nullptr, /* coll_param */ | ||
| 10782 | ctype_utf8, /* ctype */ | ||
| 10783 | nullptr, /* to_lower */ | ||
| 10784 | nullptr, /* to_upper */ | ||
| 10785 | nullptr, /* sort_order */ | ||
| 10786 | &my_uca_v900, /* uca */ | ||
| 10787 | nullptr, /* tab_to_uni */ | ||
| 10788 | nullptr, /* tab_from_uni */ | ||
| 10789 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10790 | nullptr, /* state_map */ | ||
| 10791 | nullptr, /* ident_map */ | ||
| 10792 | 0, /* strxfrm_multiply */ | ||
| 10793 | 1, /* caseup_multiply */ | ||
| 10794 | 1, /* casedn_multiply */ | ||
| 10795 | 1, /* mbminlen */ | ||
| 10796 | 4, /* mbmaxlen */ | ||
| 10797 | 1, /* mbmaxlenlen */ | ||
| 10798 | 9, /* min_sort_char */ | ||
| 10799 | 0x10FFFF, /* max_sort_char */ | ||
| 10800 | ' ', /* pad char */ | ||
| 10801 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10802 | 3, /* levels_for_compare */ | ||
| 10803 | &my_charset_utf8mb4_handler, | ||
| 10804 | &my_collation_uca_900_handler, | ||
| 10805 | NO_PAD}; | ||
| 10806 | |||
| 10807 | CHARSET_INFO my_charset_utf8mb4_da_0900_as_cs = { | ||
| 10808 | 290, | ||
| 10809 | 0, | ||
| 10810 | 0, /* number */ | ||
| 10811 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10812 | MY_UTF8MB4, /* csname */ | ||
| 10813 | MY_UTF8MB4 "_da_0900_as_cs", /* m_coll_name */ | ||
| 10814 | "", /* comment */ | ||
| 10815 | da_cldr_30, /* tailoring */ | ||
| 10816 | &da_coll_param, /* coll_param */ | ||
| 10817 | ctype_utf8, /* ctype */ | ||
| 10818 | nullptr, /* to_lower */ | ||
| 10819 | nullptr, /* to_upper */ | ||
| 10820 | nullptr, /* sort_order */ | ||
| 10821 | &my_uca_v900, /* uca */ | ||
| 10822 | nullptr, /* tab_to_uni */ | ||
| 10823 | nullptr, /* tab_from_uni */ | ||
| 10824 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10825 | nullptr, /* state_map */ | ||
| 10826 | nullptr, /* ident_map */ | ||
| 10827 | 0, /* strxfrm_multiply */ | ||
| 10828 | 1, /* caseup_multiply */ | ||
| 10829 | 1, /* casedn_multiply */ | ||
| 10830 | 1, /* mbminlen */ | ||
| 10831 | 4, /* mbmaxlen */ | ||
| 10832 | 1, /* mbmaxlenlen */ | ||
| 10833 | 9, /* min_sort_char */ | ||
| 10834 | 0x10FFFF, /* max_sort_char */ | ||
| 10835 | ' ', /* pad char */ | ||
| 10836 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10837 | 3, /* levels_for_compare */ | ||
| 10838 | &my_charset_utf8mb4_handler, | ||
| 10839 | &my_collation_uca_900_handler, | ||
| 10840 | NO_PAD}; | ||
| 10841 | |||
| 10842 | CHARSET_INFO my_charset_utf8mb4_lt_0900_as_cs = { | ||
| 10843 | 291, | ||
| 10844 | 0, | ||
| 10845 | 0, /* number */ | ||
| 10846 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10847 | MY_UTF8MB4, /* csname */ | ||
| 10848 | MY_UTF8MB4 "_lt_0900_as_cs", /* m_coll_name */ | ||
| 10849 | "", /* comment */ | ||
| 10850 | lt_cldr_30, /* tailoring */ | ||
| 10851 | nullptr, /* coll_param */ | ||
| 10852 | ctype_utf8, /* ctype */ | ||
| 10853 | nullptr, /* to_lower */ | ||
| 10854 | nullptr, /* to_upper */ | ||
| 10855 | nullptr, /* sort_order */ | ||
| 10856 | &my_uca_v900, /* uca */ | ||
| 10857 | nullptr, /* tab_to_uni */ | ||
| 10858 | nullptr, /* tab_from_uni */ | ||
| 10859 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10860 | nullptr, /* state_map */ | ||
| 10861 | nullptr, /* ident_map */ | ||
| 10862 | 0, /* strxfrm_multiply */ | ||
| 10863 | 1, /* caseup_multiply */ | ||
| 10864 | 1, /* casedn_multiply */ | ||
| 10865 | 1, /* mbminlen */ | ||
| 10866 | 4, /* mbmaxlen */ | ||
| 10867 | 1, /* mbmaxlenlen */ | ||
| 10868 | 9, /* min_sort_char */ | ||
| 10869 | 0x10FFFF, /* max_sort_char */ | ||
| 10870 | ' ', /* pad char */ | ||
| 10871 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10872 | 3, /* levels_for_compare */ | ||
| 10873 | &my_charset_utf8mb4_handler, | ||
| 10874 | &my_collation_uca_900_handler, | ||
| 10875 | NO_PAD}; | ||
| 10876 | |||
| 10877 | CHARSET_INFO my_charset_utf8mb4_sk_0900_as_cs = { | ||
| 10878 | 292, | ||
| 10879 | 0, | ||
| 10880 | 0, /* number */ | ||
| 10881 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10882 | MY_UTF8MB4, /* csname */ | ||
| 10883 | MY_UTF8MB4 "_sk_0900_as_cs", /* m_coll_name */ | ||
| 10884 | "", /* comment */ | ||
| 10885 | sk_cldr_30, /* tailoring */ | ||
| 10886 | nullptr, /* coll_param */ | ||
| 10887 | ctype_utf8, /* ctype */ | ||
| 10888 | nullptr, /* to_lower */ | ||
| 10889 | nullptr, /* to_upper */ | ||
| 10890 | nullptr, /* sort_order */ | ||
| 10891 | &my_uca_v900, /* uca */ | ||
| 10892 | nullptr, /* tab_to_uni */ | ||
| 10893 | nullptr, /* tab_from_uni */ | ||
| 10894 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10895 | nullptr, /* state_map */ | ||
| 10896 | nullptr, /* ident_map */ | ||
| 10897 | 0, /* strxfrm_multiply */ | ||
| 10898 | 1, /* caseup_multiply */ | ||
| 10899 | 1, /* casedn_multiply */ | ||
| 10900 | 1, /* mbminlen */ | ||
| 10901 | 4, /* mbmaxlen */ | ||
| 10902 | 1, /* mbmaxlenlen */ | ||
| 10903 | 9, /* min_sort_char */ | ||
| 10904 | 0x10FFFF, /* max_sort_char */ | ||
| 10905 | ' ', /* pad char */ | ||
| 10906 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10907 | 3, /* levels_for_compare */ | ||
| 10908 | &my_charset_utf8mb4_handler, | ||
| 10909 | &my_collation_uca_900_handler, | ||
| 10910 | NO_PAD}; | ||
| 10911 | |||
| 10912 | CHARSET_INFO my_charset_utf8mb4_es_trad_0900_as_cs = { | ||
| 10913 | 293, | ||
| 10914 | 0, | ||
| 10915 | 0, /* number */ | ||
| 10916 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10917 | MY_UTF8MB4, /* csname */ | ||
| 10918 | MY_UTF8MB4 "_es_trad_0900_as_cs", /* m_coll_name */ | ||
| 10919 | "", /* comment */ | ||
| 10920 | es_trad_cldr_30, /* tailoring */ | ||
| 10921 | nullptr, /* coll_param */ | ||
| 10922 | ctype_utf8, /* ctype */ | ||
| 10923 | nullptr, /* to_lower */ | ||
| 10924 | nullptr, /* to_upper */ | ||
| 10925 | nullptr, /* sort_order */ | ||
| 10926 | &my_uca_v900, /* uca */ | ||
| 10927 | nullptr, /* tab_to_uni */ | ||
| 10928 | nullptr, /* tab_from_uni */ | ||
| 10929 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10930 | nullptr, /* state_map */ | ||
| 10931 | nullptr, /* ident_map */ | ||
| 10932 | 0, /* strxfrm_multiply */ | ||
| 10933 | 1, /* caseup_multiply */ | ||
| 10934 | 1, /* casedn_multiply */ | ||
| 10935 | 1, /* mbminlen */ | ||
| 10936 | 4, /* mbmaxlen */ | ||
| 10937 | 1, /* mbmaxlenlen */ | ||
| 10938 | 9, /* min_sort_char */ | ||
| 10939 | 0x10FFFF, /* max_sort_char */ | ||
| 10940 | ' ', /* pad char */ | ||
| 10941 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10942 | 3, /* levels_for_compare */ | ||
| 10943 | &my_charset_utf8mb4_handler, | ||
| 10944 | &my_collation_uca_900_handler, | ||
| 10945 | NO_PAD}; | ||
| 10946 | |||
| 10947 | CHARSET_INFO my_charset_utf8mb4_la_0900_as_cs = { | ||
| 10948 | 294, | ||
| 10949 | 0, | ||
| 10950 | 0, /* number */ | ||
| 10951 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 10952 | MY_UTF8MB4, /* csname */ | ||
| 10953 | MY_UTF8MB4 "_la_0900_as_cs", /* m_coll_name */ | ||
| 10954 | "", /* comment */ | ||
| 10955 | roman, /* tailoring */ | ||
| 10956 | nullptr, /* coll_param */ | ||
| 10957 | ctype_utf8, /* ctype */ | ||
| 10958 | nullptr, /* to_lower */ | ||
| 10959 | nullptr, /* to_upper */ | ||
| 10960 | nullptr, /* sort_order */ | ||
| 10961 | &my_uca_v900, /* uca */ | ||
| 10962 | nullptr, /* tab_to_uni */ | ||
| 10963 | nullptr, /* tab_from_uni */ | ||
| 10964 | &my_unicase_unicode900, /* caseinfo */ | ||
| 10965 | nullptr, /* state_map */ | ||
| 10966 | nullptr, /* ident_map */ | ||
| 10967 | 0, /* strxfrm_multiply */ | ||
| 10968 | 1, /* caseup_multiply */ | ||
| 10969 | 1, /* casedn_multiply */ | ||
| 10970 | 1, /* mbminlen */ | ||
| 10971 | 4, /* mbmaxlen */ | ||
| 10972 | 1, /* mbmaxlenlen */ | ||
| 10973 | 9, /* min_sort_char */ | ||
| 10974 | 0x10FFFF, /* max_sort_char */ | ||
| 10975 | ' ', /* pad char */ | ||
| 10976 | false, /* escape_with_backslash_is_dangerous */ | ||
| 10977 | 3, /* levels_for_compare */ | ||
| 10978 | &my_charset_utf8mb4_handler, | ||
| 10979 | &my_collation_uca_900_handler, | ||
| 10980 | NO_PAD}; | ||
| 10981 | |||
| 10982 | #if 0 | ||
| 10983 | CHARSET_INFO my_charset_utf8mb4_fa_0900_as_cs= | ||
| 10984 | { | ||
| 10985 | 295, 0, 0, /* number */ | ||
| 10986 | MY_CS_UTF8MB4_UCA_FLAGS|MY_CS_CSSORT,/* state */ | ||
| 10987 | MY_UTF8MB4, /* csname */ | ||
| 10988 | MY_UTF8MB4 "_fa_0900_as_cs",/* m_coll_name */ | ||
| 10989 | "", /* comment */ | ||
| 10990 | fa_cldr_30, /* tailoring */ | ||
| 10991 | &fa_coll_param, /* coll_param */ | ||
| 10992 | ctype_utf8, /* ctype */ | ||
| 10993 | NULL, /* to_lower */ | ||
| 10994 | NULL, /* to_upper */ | ||
| 10995 | NULL, /* sort_order */ | ||
| 10996 | &my_uca_v900, /* uca */ | ||
| 10997 | NULL, /* tab_to_uni */ | ||
| 10998 | NULL, /* tab_from_uni */ | ||
| 10999 | &my_unicase_unicode900,/* caseinfo */ | ||
| 11000 | NULL, /* state_map */ | ||
| 11001 | NULL, /* ident_map */ | ||
| 11002 | 0, /* strxfrm_multiply */ | ||
| 11003 | 1, /* caseup_multiply */ | ||
| 11004 | 1, /* casedn_multiply */ | ||
| 11005 | 1, /* mbminlen */ | ||
| 11006 | 4, /* mbmaxlen */ | ||
| 11007 | 1, /* mbmaxlenlen */ | ||
| 11008 | 9, /* min_sort_char */ | ||
| 11009 | 0x10FFFF, /* max_sort_char */ | ||
| 11010 | ' ', /* pad char */ | ||
| 11011 | 0, /* escape_with_backslash_is_dangerous */ | ||
| 11012 | 3, /* levels_for_compare */ | ||
| 11013 | &my_charset_utf8mb4_handler, | ||
| 11014 | &my_collation_uca_900_handler, | ||
| 11015 | NO_PAD | ||
| 11016 | }; | ||
| 11017 | #endif | ||
| 11018 | |||
| 11019 | CHARSET_INFO my_charset_utf8mb4_eo_0900_as_cs = { | ||
| 11020 | 296, | ||
| 11021 | 0, | ||
| 11022 | 0, /* number */ | ||
| 11023 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11024 | MY_UTF8MB4, /* csname */ | ||
| 11025 | MY_UTF8MB4 "_eo_0900_as_cs", /* m_coll_name */ | ||
| 11026 | "", /* comment */ | ||
| 11027 | esperanto, /* tailoring */ | ||
| 11028 | nullptr, /* coll_param */ | ||
| 11029 | ctype_utf8, /* ctype */ | ||
| 11030 | nullptr, /* to_lower */ | ||
| 11031 | nullptr, /* to_upper */ | ||
| 11032 | nullptr, /* sort_order */ | ||
| 11033 | &my_uca_v900, /* uca */ | ||
| 11034 | nullptr, /* tab_to_uni */ | ||
| 11035 | nullptr, /* tab_from_uni */ | ||
| 11036 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11037 | nullptr, /* state_map */ | ||
| 11038 | nullptr, /* ident_map */ | ||
| 11039 | 0, /* strxfrm_multiply */ | ||
| 11040 | 1, /* caseup_multiply */ | ||
| 11041 | 1, /* casedn_multiply */ | ||
| 11042 | 1, /* mbminlen */ | ||
| 11043 | 4, /* mbmaxlen */ | ||
| 11044 | 1, /* mbmaxlenlen */ | ||
| 11045 | 9, /* min_sort_char */ | ||
| 11046 | 0x10FFFF, /* max_sort_char */ | ||
| 11047 | ' ', /* pad char */ | ||
| 11048 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11049 | 3, /* levels_for_compare */ | ||
| 11050 | &my_charset_utf8mb4_handler, | ||
| 11051 | &my_collation_uca_900_handler, | ||
| 11052 | NO_PAD}; | ||
| 11053 | |||
| 11054 | CHARSET_INFO my_charset_utf8mb4_hu_0900_as_cs = { | ||
| 11055 | 297, | ||
| 11056 | 0, | ||
| 11057 | 0, /* number */ | ||
| 11058 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11059 | MY_UTF8MB4, /* csname */ | ||
| 11060 | MY_UTF8MB4 "_hu_0900_as_cs", /* m_coll_name */ | ||
| 11061 | "", /* comment */ | ||
| 11062 | hu_cldr_30, /* tailoring */ | ||
| 11063 | nullptr, /* coll_param */ | ||
| 11064 | ctype_utf8, /* ctype */ | ||
| 11065 | nullptr, /* to_lower */ | ||
| 11066 | nullptr, /* to_upper */ | ||
| 11067 | nullptr, /* sort_order */ | ||
| 11068 | &my_uca_v900, /* uca */ | ||
| 11069 | nullptr, /* tab_to_uni */ | ||
| 11070 | nullptr, /* tab_from_uni */ | ||
| 11071 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11072 | nullptr, /* state_map */ | ||
| 11073 | nullptr, /* ident_map */ | ||
| 11074 | 0, /* strxfrm_multiply */ | ||
| 11075 | 1, /* caseup_multiply */ | ||
| 11076 | 1, /* casedn_multiply */ | ||
| 11077 | 1, /* mbminlen */ | ||
| 11078 | 4, /* mbmaxlen */ | ||
| 11079 | 1, /* mbmaxlenlen */ | ||
| 11080 | 9, /* min_sort_char */ | ||
| 11081 | 0x10FFFF, /* max_sort_char */ | ||
| 11082 | ' ', /* pad char */ | ||
| 11083 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11084 | 3, /* levels_for_compare */ | ||
| 11085 | &my_charset_utf8mb4_handler, | ||
| 11086 | &my_collation_uca_900_handler, | ||
| 11087 | NO_PAD}; | ||
| 11088 | |||
| 11089 | CHARSET_INFO my_charset_utf8mb4_hr_0900_as_cs = { | ||
| 11090 | 298, | ||
| 11091 | 0, | ||
| 11092 | 0, /* number */ | ||
| 11093 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11094 | MY_UTF8MB4, /* csname */ | ||
| 11095 | MY_UTF8MB4 "_hr_0900_as_cs", /* m_coll_name */ | ||
| 11096 | "", /* comment */ | ||
| 11097 | hr_cldr_30, /* tailoring */ | ||
| 11098 | &hr_coll_param, /* coll_param */ | ||
| 11099 | ctype_utf8, /* ctype */ | ||
| 11100 | nullptr, /* to_lower */ | ||
| 11101 | nullptr, /* to_upper */ | ||
| 11102 | nullptr, /* sort_order */ | ||
| 11103 | &my_uca_v900, /* uca */ | ||
| 11104 | nullptr, /* tab_to_uni */ | ||
| 11105 | nullptr, /* tab_from_uni */ | ||
| 11106 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11107 | nullptr, /* state_map */ | ||
| 11108 | nullptr, /* ident_map */ | ||
| 11109 | 0, /* strxfrm_multiply */ | ||
| 11110 | 1, /* caseup_multiply */ | ||
| 11111 | 1, /* casedn_multiply */ | ||
| 11112 | 1, /* mbminlen */ | ||
| 11113 | 4, /* mbmaxlen */ | ||
| 11114 | 1, /* mbmaxlenlen */ | ||
| 11115 | 9, /* min_sort_char */ | ||
| 11116 | 0x10FFFF, /* max_sort_char */ | ||
| 11117 | ' ', /* pad char */ | ||
| 11118 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11119 | 3, /* levels_for_compare */ | ||
| 11120 | &my_charset_utf8mb4_handler, | ||
| 11121 | &my_collation_uca_900_handler, | ||
| 11122 | NO_PAD}; | ||
| 11123 | |||
| 11124 | #if 0 | ||
| 11125 | CHARSET_INFO my_charset_utf8mb4_si_0900_as_cs= | ||
| 11126 | { | ||
| 11127 | 299, 0, 0, /* number */ | ||
| 11128 | MY_CS_UTF8MB4_UCA_FLAGS|MY_CS_CSSORT,/* state */ | ||
| 11129 | MY_UTF8MB4, /* csname */ | ||
| 11130 | MY_UTF8MB4 "_si_0900_as_cs",/* m_coll_name */ | ||
| 11131 | "", /* comment */ | ||
| 11132 | si_cldr_30, /* tailoring */ | ||
| 11133 | NULL, /* coll_param */ | ||
| 11134 | ctype_utf8, /* ctype */ | ||
| 11135 | NULL, /* to_lower */ | ||
| 11136 | NULL, /* to_upper */ | ||
| 11137 | NULL, /* sort_order */ | ||
| 11138 | &my_uca_v900, /* uca */ | ||
| 11139 | NULL, /* tab_to_uni */ | ||
| 11140 | NULL, /* tab_from_uni */ | ||
| 11141 | &my_unicase_unicode900,/* caseinfo */ | ||
| 11142 | NULL, /* state_map */ | ||
| 11143 | NULL, /* ident_map */ | ||
| 11144 | 0, /* strxfrm_multiply */ | ||
| 11145 | 1, /* caseup_multiply */ | ||
| 11146 | 1, /* casedn_multiply */ | ||
| 11147 | 1, /* mbminlen */ | ||
| 11148 | 4, /* mbmaxlen */ | ||
| 11149 | 1, /* mbmaxlenlen */ | ||
| 11150 | 9, /* min_sort_char */ | ||
| 11151 | 0x10FFFF, /* max_sort_char */ | ||
| 11152 | ' ', /* pad char */ | ||
| 11153 | 0, /* escape_with_backslash_is_dangerous */ | ||
| 11154 | 3, /* levels_for_compare */ | ||
| 11155 | &my_charset_utf8mb4_handler, | ||
| 11156 | &my_collation_uca_900_handler, | ||
| 11157 | NO_PAD | ||
| 11158 | }; | ||
| 11159 | #endif | ||
| 11160 | |||
| 11161 | CHARSET_INFO my_charset_utf8mb4_vi_0900_as_cs = { | ||
| 11162 | 300, | ||
| 11163 | 0, | ||
| 11164 | 0, /* number */ | ||
| 11165 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11166 | MY_UTF8MB4, /* csname */ | ||
| 11167 | MY_UTF8MB4 "_vi_0900_as_cs", /* m_coll_name */ | ||
| 11168 | "", /* comment */ | ||
| 11169 | vi_cldr_30, /* tailoring */ | ||
| 11170 | &vi_coll_param, /* coll_param */ | ||
| 11171 | ctype_utf8, /* ctype */ | ||
| 11172 | nullptr, /* to_lower */ | ||
| 11173 | nullptr, /* to_upper */ | ||
| 11174 | nullptr, /* sort_order */ | ||
| 11175 | &my_uca_v900, /* uca */ | ||
| 11176 | nullptr, /* tab_to_uni */ | ||
| 11177 | nullptr, /* tab_from_uni */ | ||
| 11178 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11179 | nullptr, /* state_map */ | ||
| 11180 | nullptr, /* ident_map */ | ||
| 11181 | 0, /* strxfrm_multiply */ | ||
| 11182 | 1, /* caseup_multiply */ | ||
| 11183 | 1, /* casedn_multiply */ | ||
| 11184 | 1, /* mbminlen */ | ||
| 11185 | 4, /* mbmaxlen */ | ||
| 11186 | 1, /* mbmaxlenlen */ | ||
| 11187 | 9, /* min_sort_char */ | ||
| 11188 | 0x10FFFF, /* max_sort_char */ | ||
| 11189 | ' ', /* pad char */ | ||
| 11190 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11191 | 3, /* levels_for_compare */ | ||
| 11192 | &my_charset_utf8mb4_handler, | ||
| 11193 | &my_collation_uca_900_handler, | ||
| 11194 | NO_PAD}; | ||
| 11195 | |||
| 11196 | CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs = { | ||
| 11197 | 303, | ||
| 11198 | 0, | ||
| 11199 | 0, /* number */ | ||
| 11200 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11201 | MY_UTF8MB4, /* csname */ | ||
| 11202 | MY_UTF8MB4 "_ja_0900_as_cs", /* m_coll_name */ | ||
| 11203 | "", /* comment */ | ||
| 11204 | ja_cldr_30, /* tailoring */ | ||
| 11205 | &ja_coll_param, /* coll_param */ | ||
| 11206 | ctype_utf8, /* ctype */ | ||
| 11207 | nullptr, /* to_lower */ | ||
| 11208 | nullptr, /* to_upper */ | ||
| 11209 | nullptr, /* sort_order */ | ||
| 11210 | &my_uca_v900, /* uca */ | ||
| 11211 | nullptr, /* tab_to_uni */ | ||
| 11212 | nullptr, /* tab_from_uni */ | ||
| 11213 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11214 | nullptr, /* state_map */ | ||
| 11215 | nullptr, /* ident_map */ | ||
| 11216 | 0, /* strxfrm_multiply */ | ||
| 11217 | 1, /* caseup_multiply */ | ||
| 11218 | 1, /* casedn_multiply */ | ||
| 11219 | 1, /* mbminlen */ | ||
| 11220 | 4, /* mbmaxlen */ | ||
| 11221 | 1, /* mbmaxlenlen */ | ||
| 11222 | 32, /* min_sort_char */ | ||
| 11223 | 0x10FFFF, /* max_sort_char */ | ||
| 11224 | ' ', /* pad char */ | ||
| 11225 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11226 | 3, /* levels_for_compare */ | ||
| 11227 | &my_charset_utf8mb4_handler, | ||
| 11228 | &my_collation_uca_900_handler, | ||
| 11229 | NO_PAD}; | ||
| 11230 | |||
| 11231 | CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs_ks = { | ||
| 11232 | 304, | ||
| 11233 | 0, | ||
| 11234 | 0, /* number */ | ||
| 11235 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11236 | MY_UTF8MB4, /* csname */ | ||
| 11237 | MY_UTF8MB4 "_ja_0900_as_cs_ks", /* m_coll_name */ | ||
| 11238 | "", /* comment */ | ||
| 11239 | ja_cldr_30, /* tailoring */ | ||
| 11240 | &ja_coll_param, /* coll_param */ | ||
| 11241 | ctype_utf8, /* ctype */ | ||
| 11242 | nullptr, /* to_lower */ | ||
| 11243 | nullptr, /* to_upper */ | ||
| 11244 | nullptr, /* sort_order */ | ||
| 11245 | &my_uca_v900, /* uca */ | ||
| 11246 | nullptr, /* tab_to_uni */ | ||
| 11247 | nullptr, /* tab_from_uni */ | ||
| 11248 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11249 | nullptr, /* state_map */ | ||
| 11250 | nullptr, /* ident_map */ | ||
| 11251 | 24, /* strxfrm_multiply */ | ||
| 11252 | 1, /* caseup_multiply */ | ||
| 11253 | 1, /* casedn_multiply */ | ||
| 11254 | 1, /* mbminlen */ | ||
| 11255 | 4, /* mbmaxlen */ | ||
| 11256 | 1, /* mbmaxlenlen */ | ||
| 11257 | 32, /* min_sort_char */ | ||
| 11258 | 0x10FFFF, /* max_sort_char */ | ||
| 11259 | ' ', /* pad char */ | ||
| 11260 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11261 | 4, /* levels_for_compare */ | ||
| 11262 | &my_charset_utf8mb4_handler, | ||
| 11263 | &my_collation_uca_900_handler, | ||
| 11264 | NO_PAD}; | ||
| 11265 | |||
| 11266 | CHARSET_INFO my_charset_utf8mb4_0900_as_ci = { | ||
| 11267 | 305, | ||
| 11268 | 0, | ||
| 11269 | 0, /* number */ | ||
| 11270 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 11271 | MY_UTF8MB4, /* csname */ | ||
| 11272 | MY_UTF8MB4 "_0900_as_ci", /* m_coll_name */ | ||
| 11273 | "", /* comment */ | ||
| 11274 | nullptr, /* tailoring */ | ||
| 11275 | nullptr, /* coll_param */ | ||
| 11276 | ctype_utf8, /* ctype */ | ||
| 11277 | nullptr, /* to_lower */ | ||
| 11278 | nullptr, /* to_upper */ | ||
| 11279 | nullptr, /* sort_order */ | ||
| 11280 | &my_uca_v900, /* uca */ | ||
| 11281 | nullptr, /* tab_to_uni */ | ||
| 11282 | nullptr, /* tab_from_uni */ | ||
| 11283 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11284 | nullptr, /* state_map */ | ||
| 11285 | nullptr, /* ident_map */ | ||
| 11286 | 0, /* strxfrm_multiply */ | ||
| 11287 | 1, /* caseup_multiply */ | ||
| 11288 | 1, /* casedn_multiply */ | ||
| 11289 | 1, /* mbminlen */ | ||
| 11290 | 4, /* mbmaxlen */ | ||
| 11291 | 1, /* mbmaxlenlen */ | ||
| 11292 | 32, /* min_sort_char */ | ||
| 11293 | 0x10FFFF, /* max_sort_char */ | ||
| 11294 | ' ', /* pad char */ | ||
| 11295 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11296 | 2, /* levels_for_compare */ | ||
| 11297 | &my_charset_utf8mb4_handler, | ||
| 11298 | &my_collation_uca_900_handler, | ||
| 11299 | NO_PAD}; | ||
| 11300 | |||
| 11301 | CHARSET_INFO my_charset_utf8mb4_ru_0900_ai_ci = { | ||
| 11302 | 306, | ||
| 11303 | 0, | ||
| 11304 | 0, /* number */ | ||
| 11305 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 11306 | MY_UTF8MB4, /* csname */ | ||
| 11307 | MY_UTF8MB4 "_ru_0900_ai_ci", /* m_coll_name */ | ||
| 11308 | "", /* comment */ | ||
| 11309 | "", /* tailoring */ | ||
| 11310 | &ru_coll_param, /* coll_param */ | ||
| 11311 | ctype_utf8, /* ctype */ | ||
| 11312 | nullptr, /* to_lower */ | ||
| 11313 | nullptr, /* to_upper */ | ||
| 11314 | nullptr, /* sort_order */ | ||
| 11315 | &my_uca_v900, /* uca */ | ||
| 11316 | nullptr, /* tab_to_uni */ | ||
| 11317 | nullptr, /* tab_from_uni */ | ||
| 11318 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11319 | nullptr, /* state_map */ | ||
| 11320 | nullptr, /* ident_map */ | ||
| 11321 | 0, /* strxfrm_multiply */ | ||
| 11322 | 1, /* caseup_multiply */ | ||
| 11323 | 1, /* casedn_multiply */ | ||
| 11324 | 1, /* mbminlen */ | ||
| 11325 | 4, /* mbmaxlen */ | ||
| 11326 | 1, /* mbmaxlenlen */ | ||
| 11327 | 32, /* min_sort_char */ | ||
| 11328 | 0x10FFFF, /* max_sort_char */ | ||
| 11329 | ' ', /* pad char */ | ||
| 11330 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11331 | 1, /* levels_for_compare */ | ||
| 11332 | &my_charset_utf8mb4_handler, | ||
| 11333 | &my_collation_uca_900_handler, | ||
| 11334 | NO_PAD}; | ||
| 11335 | |||
| 11336 | CHARSET_INFO my_charset_utf8mb4_ru_0900_as_cs = { | ||
| 11337 | 307, | ||
| 11338 | 0, | ||
| 11339 | 0, /* number */ | ||
| 11340 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11341 | MY_UTF8MB4, /* csname */ | ||
| 11342 | MY_UTF8MB4 "_ru_0900_as_cs", /* m_coll_name */ | ||
| 11343 | "", /* comment */ | ||
| 11344 | "", /* tailoring */ | ||
| 11345 | &ru_coll_param, /* coll_param */ | ||
| 11346 | ctype_utf8, /* ctype */ | ||
| 11347 | nullptr, /* to_lower */ | ||
| 11348 | nullptr, /* to_upper */ | ||
| 11349 | nullptr, /* sort_order */ | ||
| 11350 | &my_uca_v900, /* uca */ | ||
| 11351 | nullptr, /* tab_to_uni */ | ||
| 11352 | nullptr, /* tab_from_uni */ | ||
| 11353 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11354 | nullptr, /* state_map */ | ||
| 11355 | nullptr, /* ident_map */ | ||
| 11356 | 0, /* strxfrm_multiply */ | ||
| 11357 | 1, /* caseup_multiply */ | ||
| 11358 | 1, /* casedn_multiply */ | ||
| 11359 | 1, /* mbminlen */ | ||
| 11360 | 4, /* mbmaxlen */ | ||
| 11361 | 1, /* mbmaxlenlen */ | ||
| 11362 | 32, /* min_sort_char */ | ||
| 11363 | 0x10FFFF, /* max_sort_char */ | ||
| 11364 | ' ', /* pad char */ | ||
| 11365 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11366 | 3, /* levels_for_compare */ | ||
| 11367 | &my_charset_utf8mb4_handler, | ||
| 11368 | &my_collation_uca_900_handler, | ||
| 11369 | NO_PAD}; | ||
| 11370 | |||
| 11371 | CHARSET_INFO my_charset_utf8mb4_zh_0900_as_cs = { | ||
| 11372 | 308, | ||
| 11373 | 0, | ||
| 11374 | 0, /* number */ | ||
| 11375 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11376 | MY_UTF8MB4, /* csname */ | ||
| 11377 | MY_UTF8MB4 "_zh_0900_as_cs", /* m_coll_name */ | ||
| 11378 | "", /* comment */ | ||
| 11379 | zh_cldr_30, /* tailoring */ | ||
| 11380 | &zh_coll_param, /* coll_param */ | ||
| 11381 | ctype_utf8, /* ctype */ | ||
| 11382 | nullptr, /* to_lower */ | ||
| 11383 | nullptr, /* to_upper */ | ||
| 11384 | nullptr, /* sort_order */ | ||
| 11385 | &my_uca_v900, /* uca */ | ||
| 11386 | nullptr, /* tab_to_uni */ | ||
| 11387 | nullptr, /* tab_from_uni */ | ||
| 11388 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11389 | nullptr, /* state_map */ | ||
| 11390 | nullptr, /* ident_map */ | ||
| 11391 | 0, /* strxfrm_multiply */ | ||
| 11392 | 1, /* caseup_multiply */ | ||
| 11393 | 1, /* casedn_multiply */ | ||
| 11394 | 1, /* mbminlen */ | ||
| 11395 | 4, /* mbmaxlen */ | ||
| 11396 | 1, /* mbmaxlenlen */ | ||
| 11397 | 32, /* min_sort_char */ | ||
| 11398 | 0x10FFFF, /* max_sort_char */ | ||
| 11399 | ' ', /* pad char */ | ||
| 11400 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11401 | 3, /* levels_for_compare */ | ||
| 11402 | &my_charset_utf8mb4_handler, | ||
| 11403 | &my_collation_uca_900_handler, | ||
| 11404 | NO_PAD}; | ||
| 11405 | |||
| 11406 | /* | ||
| 11407 | Comparing the UTF-8 representation automatically yields codepoint order, | ||
| 11408 | so we can just do a binary comparison. Note that | ||
| 11409 | my_strnxfrm_unicode_full_bin() chooses to transform to UCS before collation; | ||
| 11410 | this is purely for legacy reasons and is not needed here. | ||
| 11411 | */ | ||
| 11412 | 3343047 | static size_t my_strnxfrm_utf8mb4_0900_bin(const CHARSET_INFO *cs | |
| 11413 | [[maybe_unused]], | ||
| 11414 | uchar *dst, size_t dstlen, | ||
| 11415 | uint nweights [[maybe_unused]], | ||
| 11416 | const uchar *src, size_t srclen, | ||
| 11417 | uint flags) { | ||
| 11418 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3343047 times.
|
3343047 | assert(src); |
| 11419 | |||
| 11420 | 3343047 | size_t weight_len = std::min<size_t>(srclen, dstlen); | |
| 11421 | 3343047 | memcpy(dst, src, weight_len); | |
| 11422 |
2/2✓ Branch 0 taken 78 times.
✓ Branch 1 taken 3342969 times.
|
3343047 | if (flags & MY_STRXFRM_PAD_TO_MAXLEN) { |
| 11423 | 78 | memset(dst + weight_len, 0, dstlen - weight_len); | |
| 11424 | 78 | return dstlen; | |
| 11425 | } else { | ||
| 11426 | 3342969 | return weight_len; | |
| 11427 | } | ||
| 11428 | } | ||
| 11429 | |||
| 11430 | 1377502 | static int my_strnncollsp_utf8mb4_0900_bin(const CHARSET_INFO *cs, | |
| 11431 | const uchar *s, size_t slen, | ||
| 11432 | const uchar *t, size_t tlen) { | ||
| 11433 | 1377502 | return my_strnncoll_mb_bin(cs, s, slen, t, tlen, false); | |
| 11434 | } | ||
| 11435 | |||
| 11436 | static MY_COLLATION_HANDLER my_collation_utf8mb4_0900_bin_handler = { | ||
| 11437 | nullptr, /* init */ | ||
| 11438 | nullptr, | ||
| 11439 | my_strnncoll_mb_bin, | ||
| 11440 | my_strnncollsp_utf8mb4_0900_bin, | ||
| 11441 | my_strnxfrm_utf8mb4_0900_bin, | ||
| 11442 | my_strnxfrmlen_simple, | ||
| 11443 | my_like_range_mb, | ||
| 11444 | my_wildcmp_mb_bin, | ||
| 11445 | my_strcasecmp_mb_bin, | ||
| 11446 | my_instr_mb, | ||
| 11447 | my_hash_sort_mb_bin, | ||
| 11448 | my_propagate_simple}; | ||
| 11449 | |||
| 11450 | CHARSET_INFO my_charset_utf8mb4_0900_bin = { | ||
| 11451 | 309, | ||
| 11452 | 0, | ||
| 11453 | 0, // number | ||
| 11454 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_BINSORT, // state | ||
| 11455 | MY_UTF8MB4, // cs name | ||
| 11456 | MY_UTF8MB4 "_0900_bin", // name | ||
| 11457 | "", // comment | ||
| 11458 | nullptr, // tailoring | ||
| 11459 | nullptr, // coll_param | ||
| 11460 | ctype_utf8, // ctype | ||
| 11461 | nullptr, // to_lower | ||
| 11462 | nullptr, // to_upper | ||
| 11463 | nullptr, // sort_order | ||
| 11464 | nullptr, // uca | ||
| 11465 | nullptr, // tab_to_uni | ||
| 11466 | nullptr, // tab_from_uni | ||
| 11467 | &my_unicase_unicode900, // caseinfo | ||
| 11468 | nullptr, // state_map | ||
| 11469 | nullptr, // ident_map | ||
| 11470 | 1, // strxfrm_multiply | ||
| 11471 | 1, // caseup_multiply | ||
| 11472 | 1, // casedn_multiply | ||
| 11473 | 1, // mbminlen | ||
| 11474 | 4, // mbmaxlen | ||
| 11475 | 1, // mbmaxlenlen | ||
| 11476 | 0, // min_sort_char | ||
| 11477 | 0x10FFFF, // max_sort_char | ||
| 11478 | ' ', // pad char | ||
| 11479 | false, // escape_with_backslash_is_dangerous | ||
| 11480 | 1, // levels_for_compare | ||
| 11481 | &my_charset_utf8mb4_handler, | ||
| 11482 | &my_collation_utf8mb4_0900_bin_handler, | ||
| 11483 | NO_PAD}; | ||
| 11484 | |||
| 11485 | CHARSET_INFO my_charset_utf8mb4_nb_0900_ai_ci = { | ||
| 11486 | 310, | ||
| 11487 | 0, | ||
| 11488 | 0, /* number */ | ||
| 11489 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 11490 | MY_UTF8MB4, /* csname */ | ||
| 11491 | MY_UTF8MB4 "_nb_0900_ai_ci", /* name */ | ||
| 11492 | "", /* comment */ | ||
| 11493 | da_cldr_30, /* tailoring */ | ||
| 11494 | nullptr, /* coll_param */ | ||
| 11495 | ctype_utf8, /* ctype */ | ||
| 11496 | nullptr, /* to_lower */ | ||
| 11497 | nullptr, /* to_upper */ | ||
| 11498 | nullptr, /* sort_order */ | ||
| 11499 | &my_uca_v900, /* uca */ | ||
| 11500 | nullptr, /* tab_to_uni */ | ||
| 11501 | nullptr, /* tab_from_uni */ | ||
| 11502 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11503 | nullptr, /* state_map */ | ||
| 11504 | nullptr, /* ident_map */ | ||
| 11505 | 0, /* strxfrm_multiply */ | ||
| 11506 | 1, /* caseup_multiply */ | ||
| 11507 | 1, /* casedn_multiply */ | ||
| 11508 | 1, /* mbminlen */ | ||
| 11509 | 4, /* mbmaxlen */ | ||
| 11510 | 1, /* mbmaxlenlen */ | ||
| 11511 | 9, /* min_sort_char */ | ||
| 11512 | 0x10FFFF, /* max_sort_char */ | ||
| 11513 | ' ', /* pad char */ | ||
| 11514 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11515 | 1, /* levels_for_compare */ | ||
| 11516 | &my_charset_utf8mb4_handler, | ||
| 11517 | &my_collation_uca_900_handler, | ||
| 11518 | NO_PAD}; | ||
| 11519 | |||
| 11520 | CHARSET_INFO my_charset_utf8mb4_nb_0900_as_cs = { | ||
| 11521 | 311, | ||
| 11522 | 0, | ||
| 11523 | 0, /* number */ | ||
| 11524 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11525 | MY_UTF8MB4, /* csname */ | ||
| 11526 | MY_UTF8MB4 "_nb_0900_as_cs", /* name */ | ||
| 11527 | "", /* comment */ | ||
| 11528 | da_cldr_30, /* tailoring */ | ||
| 11529 | &no_coll_param, /* coll_param */ | ||
| 11530 | ctype_utf8, /* ctype */ | ||
| 11531 | nullptr, /* to_lower */ | ||
| 11532 | nullptr, /* to_upper */ | ||
| 11533 | nullptr, /* sort_order */ | ||
| 11534 | &my_uca_v900, /* uca */ | ||
| 11535 | nullptr, /* tab_to_uni */ | ||
| 11536 | nullptr, /* tab_from_uni */ | ||
| 11537 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11538 | nullptr, /* state_map */ | ||
| 11539 | nullptr, /* ident_map */ | ||
| 11540 | 0, /* strxfrm_multiply */ | ||
| 11541 | 1, /* caseup_multiply */ | ||
| 11542 | 1, /* casedn_multiply */ | ||
| 11543 | 1, /* mbminlen */ | ||
| 11544 | 4, /* mbmaxlen */ | ||
| 11545 | 1, /* mbmaxlenlen */ | ||
| 11546 | 9, /* min_sort_char */ | ||
| 11547 | 0x10FFFF, /* max_sort_char */ | ||
| 11548 | ' ', /* pad char */ | ||
| 11549 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11550 | 3, /* levels_for_compare */ | ||
| 11551 | &my_charset_utf8mb4_handler, | ||
| 11552 | &my_collation_uca_900_handler, | ||
| 11553 | NO_PAD}; | ||
| 11554 | |||
| 11555 | CHARSET_INFO my_charset_utf8mb4_nn_0900_ai_ci = { | ||
| 11556 | 312, | ||
| 11557 | 0, | ||
| 11558 | 0, /* number */ | ||
| 11559 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 11560 | MY_UTF8MB4, /* csname */ | ||
| 11561 | MY_UTF8MB4 "_nn_0900_ai_ci", /* name */ | ||
| 11562 | "", /* comment */ | ||
| 11563 | da_cldr_30, /* tailoring */ | ||
| 11564 | nullptr, /* coll_param */ | ||
| 11565 | ctype_utf8, /* ctype */ | ||
| 11566 | nullptr, /* to_lower */ | ||
| 11567 | nullptr, /* to_upper */ | ||
| 11568 | nullptr, /* sort_order */ | ||
| 11569 | &my_uca_v900, /* uca */ | ||
| 11570 | nullptr, /* tab_to_uni */ | ||
| 11571 | nullptr, /* tab_from_uni */ | ||
| 11572 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11573 | nullptr, /* state_map */ | ||
| 11574 | nullptr, /* ident_map */ | ||
| 11575 | 0, /* strxfrm_multiply */ | ||
| 11576 | 1, /* caseup_multiply */ | ||
| 11577 | 1, /* casedn_multiply */ | ||
| 11578 | 1, /* mbminlen */ | ||
| 11579 | 4, /* mbmaxlen */ | ||
| 11580 | 1, /* mbmaxlenlen */ | ||
| 11581 | 9, /* min_sort_char */ | ||
| 11582 | 0x10FFFF, /* max_sort_char */ | ||
| 11583 | ' ', /* pad char */ | ||
| 11584 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11585 | 1, /* levels_for_compare */ | ||
| 11586 | &my_charset_utf8mb4_handler, | ||
| 11587 | &my_collation_uca_900_handler, | ||
| 11588 | NO_PAD}; | ||
| 11589 | |||
| 11590 | CHARSET_INFO my_charset_utf8mb4_nn_0900_as_cs = { | ||
| 11591 | 313, | ||
| 11592 | 0, | ||
| 11593 | 0, /* number */ | ||
| 11594 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11595 | MY_UTF8MB4, /* csname */ | ||
| 11596 | MY_UTF8MB4 "_nn_0900_as_cs", /* name */ | ||
| 11597 | "", /* comment */ | ||
| 11598 | da_cldr_30, /* tailoring */ | ||
| 11599 | &no_coll_param, /* coll_param */ | ||
| 11600 | ctype_utf8, /* ctype */ | ||
| 11601 | nullptr, /* to_lower */ | ||
| 11602 | nullptr, /* to_upper */ | ||
| 11603 | nullptr, /* sort_order */ | ||
| 11604 | &my_uca_v900, /* uca */ | ||
| 11605 | nullptr, /* tab_to_uni */ | ||
| 11606 | nullptr, /* tab_from_uni */ | ||
| 11607 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11608 | nullptr, /* state_map */ | ||
| 11609 | nullptr, /* ident_map */ | ||
| 11610 | 0, /* strxfrm_multiply */ | ||
| 11611 | 1, /* caseup_multiply */ | ||
| 11612 | 1, /* casedn_multiply */ | ||
| 11613 | 1, /* mbminlen */ | ||
| 11614 | 4, /* mbmaxlen */ | ||
| 11615 | 1, /* mbmaxlenlen */ | ||
| 11616 | 9, /* min_sort_char */ | ||
| 11617 | 0x10FFFF, /* max_sort_char */ | ||
| 11618 | ' ', /* pad char */ | ||
| 11619 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11620 | 3, /* levels_for_compare */ | ||
| 11621 | &my_charset_utf8mb4_handler, | ||
| 11622 | &my_collation_uca_900_handler, | ||
| 11623 | NO_PAD}; | ||
| 11624 | |||
| 11625 | CHARSET_INFO my_charset_utf8mb4_sr_latn_0900_ai_ci = { | ||
| 11626 | 314, | ||
| 11627 | 0, | ||
| 11628 | 0, /* number */ | ||
| 11629 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 11630 | MY_UTF8MB4, /* csname */ | ||
| 11631 | MY_UTF8MB4 "_sr_latn_0900_ai_ci", /* name */ | ||
| 11632 | "", /* comment */ | ||
| 11633 | hr_cldr_30, /* tailoring */ | ||
| 11634 | &hr_coll_param, /* coll_param */ | ||
| 11635 | ctype_utf8, /* ctype */ | ||
| 11636 | nullptr, /* to_lower */ | ||
| 11637 | nullptr, /* to_upper */ | ||
| 11638 | nullptr, /* sort_order */ | ||
| 11639 | &my_uca_v900, /* uca */ | ||
| 11640 | nullptr, /* tab_to_uni */ | ||
| 11641 | nullptr, /* tab_from_uni */ | ||
| 11642 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11643 | nullptr, /* state_map */ | ||
| 11644 | nullptr, /* ident_map */ | ||
| 11645 | 0, /* strxfrm_multiply */ | ||
| 11646 | 1, /* caseup_multiply */ | ||
| 11647 | 1, /* casedn_multiply */ | ||
| 11648 | 1, /* mbminlen */ | ||
| 11649 | 4, /* mbmaxlen */ | ||
| 11650 | 1, /* mbmaxlenlen */ | ||
| 11651 | 9, /* min_sort_char */ | ||
| 11652 | 0x10FFFF, /* max_sort_char */ | ||
| 11653 | ' ', /* pad char */ | ||
| 11654 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11655 | 1, /* levels_for_compare */ | ||
| 11656 | &my_charset_utf8mb4_handler, | ||
| 11657 | &my_collation_uca_900_handler, | ||
| 11658 | NO_PAD}; | ||
| 11659 | |||
| 11660 | CHARSET_INFO my_charset_utf8mb4_sr_latn_0900_as_cs = { | ||
| 11661 | 315, | ||
| 11662 | 0, | ||
| 11663 | 0, /* number */ | ||
| 11664 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11665 | MY_UTF8MB4, /* csname */ | ||
| 11666 | MY_UTF8MB4 "_sr_latn_0900_as_cs", /* name */ | ||
| 11667 | "", /* comment */ | ||
| 11668 | hr_cldr_30, /* tailoring */ | ||
| 11669 | &hr_coll_param, /* coll_param */ | ||
| 11670 | ctype_utf8, /* ctype */ | ||
| 11671 | nullptr, /* to_lower */ | ||
| 11672 | nullptr, /* to_upper */ | ||
| 11673 | nullptr, /* sort_order */ | ||
| 11674 | &my_uca_v900, /* uca */ | ||
| 11675 | nullptr, /* tab_to_uni */ | ||
| 11676 | nullptr, /* tab_from_uni */ | ||
| 11677 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11678 | nullptr, /* state_map */ | ||
| 11679 | nullptr, /* ident_map */ | ||
| 11680 | 0, /* strxfrm_multiply */ | ||
| 11681 | 1, /* caseup_multiply */ | ||
| 11682 | 1, /* casedn_multiply */ | ||
| 11683 | 1, /* mbminlen */ | ||
| 11684 | 4, /* mbmaxlen */ | ||
| 11685 | 1, /* mbmaxlenlen */ | ||
| 11686 | 9, /* min_sort_char */ | ||
| 11687 | 0x10FFFF, /* max_sort_char */ | ||
| 11688 | ' ', /* pad char */ | ||
| 11689 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11690 | 3, /* levels_for_compare */ | ||
| 11691 | &my_charset_utf8mb4_handler, | ||
| 11692 | &my_collation_uca_900_handler, | ||
| 11693 | NO_PAD}; | ||
| 11694 | |||
| 11695 | CHARSET_INFO my_charset_utf8mb4_bs_0900_ai_ci = { | ||
| 11696 | 316, | ||
| 11697 | 0, | ||
| 11698 | 0, /* number */ | ||
| 11699 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 11700 | MY_UTF8MB4, /* csname */ | ||
| 11701 | MY_UTF8MB4 "_bs_0900_ai_ci", /* name */ | ||
| 11702 | "", /* comment */ | ||
| 11703 | hr_cldr_30, /* tailoring */ | ||
| 11704 | &hr_coll_param, /* coll_param */ | ||
| 11705 | ctype_utf8, /* ctype */ | ||
| 11706 | nullptr, /* to_lower */ | ||
| 11707 | nullptr, /* to_upper */ | ||
| 11708 | nullptr, /* sort_order */ | ||
| 11709 | &my_uca_v900, /* uca */ | ||
| 11710 | nullptr, /* tab_to_uni */ | ||
| 11711 | nullptr, /* tab_from_uni */ | ||
| 11712 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11713 | nullptr, /* state_map */ | ||
| 11714 | nullptr, /* ident_map */ | ||
| 11715 | 0, /* strxfrm_multiply */ | ||
| 11716 | 1, /* caseup_multiply */ | ||
| 11717 | 1, /* casedn_multiply */ | ||
| 11718 | 1, /* mbminlen */ | ||
| 11719 | 4, /* mbmaxlen */ | ||
| 11720 | 1, /* mbmaxlenlen */ | ||
| 11721 | 9, /* min_sort_char */ | ||
| 11722 | 0x10FFFF, /* max_sort_char */ | ||
| 11723 | ' ', /* pad char */ | ||
| 11724 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11725 | 1, /* levels_for_compare */ | ||
| 11726 | &my_charset_utf8mb4_handler, | ||
| 11727 | &my_collation_uca_900_handler, | ||
| 11728 | NO_PAD}; | ||
| 11729 | |||
| 11730 | CHARSET_INFO my_charset_utf8mb4_bs_0900_as_cs = { | ||
| 11731 | 317, | ||
| 11732 | 0, | ||
| 11733 | 0, /* number */ | ||
| 11734 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11735 | MY_UTF8MB4, /* csname */ | ||
| 11736 | MY_UTF8MB4 "_bs_0900_as_cs", /* name */ | ||
| 11737 | "", /* comment */ | ||
| 11738 | hr_cldr_30, /* tailoring */ | ||
| 11739 | &hr_coll_param, /* coll_param */ | ||
| 11740 | ctype_utf8, /* ctype */ | ||
| 11741 | nullptr, /* to_lower */ | ||
| 11742 | nullptr, /* to_upper */ | ||
| 11743 | nullptr, /* sort_order */ | ||
| 11744 | &my_uca_v900, /* uca */ | ||
| 11745 | nullptr, /* tab_to_uni */ | ||
| 11746 | nullptr, /* tab_from_uni */ | ||
| 11747 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11748 | nullptr, /* state_map */ | ||
| 11749 | nullptr, /* ident_map */ | ||
| 11750 | 0, /* strxfrm_multiply */ | ||
| 11751 | 1, /* caseup_multiply */ | ||
| 11752 | 1, /* casedn_multiply */ | ||
| 11753 | 1, /* mbminlen */ | ||
| 11754 | 4, /* mbmaxlen */ | ||
| 11755 | 1, /* mbmaxlenlen */ | ||
| 11756 | 9, /* min_sort_char */ | ||
| 11757 | 0x10FFFF, /* max_sort_char */ | ||
| 11758 | ' ', /* pad char */ | ||
| 11759 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11760 | 3, /* levels_for_compare */ | ||
| 11761 | &my_charset_utf8mb4_handler, | ||
| 11762 | &my_collation_uca_900_handler, | ||
| 11763 | NO_PAD}; | ||
| 11764 | |||
| 11765 | CHARSET_INFO my_charset_utf8mb4_bg_0900_ai_ci = { | ||
| 11766 | 318, | ||
| 11767 | 0, | ||
| 11768 | 0, /* number */ | ||
| 11769 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 11770 | MY_UTF8MB4, /* csname */ | ||
| 11771 | MY_UTF8MB4 "_bg_0900_ai_ci", /* name */ | ||
| 11772 | "", /* comment */ | ||
| 11773 | "", /* tailoring */ | ||
| 11774 | &ru_coll_param, /* coll_param */ | ||
| 11775 | ctype_utf8, /* ctype */ | ||
| 11776 | nullptr, /* to_lower */ | ||
| 11777 | nullptr, /* to_upper */ | ||
| 11778 | nullptr, /* sort_order */ | ||
| 11779 | &my_uca_v900, /* uca */ | ||
| 11780 | nullptr, /* tab_to_uni */ | ||
| 11781 | nullptr, /* tab_from_uni */ | ||
| 11782 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11783 | nullptr, /* state_map */ | ||
| 11784 | nullptr, /* ident_map */ | ||
| 11785 | 0, /* strxfrm_multiply */ | ||
| 11786 | 1, /* caseup_multiply */ | ||
| 11787 | 1, /* casedn_multiply */ | ||
| 11788 | 1, /* mbminlen */ | ||
| 11789 | 4, /* mbmaxlen */ | ||
| 11790 | 1, /* mbmaxlenlen */ | ||
| 11791 | 32, /* min_sort_char */ | ||
| 11792 | 0x10FFFF, /* max_sort_char */ | ||
| 11793 | ' ', /* pad char */ | ||
| 11794 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11795 | 1, /* levels_for_compare */ | ||
| 11796 | &my_charset_utf8mb4_handler, | ||
| 11797 | &my_collation_uca_900_handler, | ||
| 11798 | NO_PAD}; | ||
| 11799 | |||
| 11800 | CHARSET_INFO my_charset_utf8mb4_bg_0900_as_cs = { | ||
| 11801 | 319, | ||
| 11802 | 0, | ||
| 11803 | 0, /* number */ | ||
| 11804 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11805 | MY_UTF8MB4, /* csname */ | ||
| 11806 | MY_UTF8MB4 "_bg_0900_as_cs", /* name */ | ||
| 11807 | "", /* comment */ | ||
| 11808 | "", /* tailoring */ | ||
| 11809 | &ru_coll_param, /* coll_param */ | ||
| 11810 | ctype_utf8, /* ctype */ | ||
| 11811 | nullptr, /* to_lower */ | ||
| 11812 | nullptr, /* to_upper */ | ||
| 11813 | nullptr, /* sort_order */ | ||
| 11814 | &my_uca_v900, /* uca */ | ||
| 11815 | nullptr, /* tab_to_uni */ | ||
| 11816 | nullptr, /* tab_from_uni */ | ||
| 11817 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11818 | nullptr, /* state_map */ | ||
| 11819 | nullptr, /* ident_map */ | ||
| 11820 | 0, /* strxfrm_multiply */ | ||
| 11821 | 1, /* caseup_multiply */ | ||
| 11822 | 1, /* casedn_multiply */ | ||
| 11823 | 1, /* mbminlen */ | ||
| 11824 | 4, /* mbmaxlen */ | ||
| 11825 | 1, /* mbmaxlenlen */ | ||
| 11826 | 32, /* min_sort_char */ | ||
| 11827 | 0x10FFFF, /* max_sort_char */ | ||
| 11828 | ' ', /* pad char */ | ||
| 11829 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11830 | 3, /* levels_for_compare */ | ||
| 11831 | &my_charset_utf8mb4_handler, | ||
| 11832 | &my_collation_uca_900_handler, | ||
| 11833 | NO_PAD}; | ||
| 11834 | |||
| 11835 | CHARSET_INFO my_charset_utf8mb4_gl_0900_ai_ci = { | ||
| 11836 | 320, | ||
| 11837 | 0, | ||
| 11838 | 0, /* number */ | ||
| 11839 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 11840 | MY_UTF8MB4, /* csname */ | ||
| 11841 | MY_UTF8MB4 "_gl_0900_ai_ci", /* name */ | ||
| 11842 | "", /* comment */ | ||
| 11843 | spanish, /* tailoring */ | ||
| 11844 | nullptr, /* coll_param */ | ||
| 11845 | ctype_utf8, /* ctype */ | ||
| 11846 | nullptr, /* to_lower */ | ||
| 11847 | nullptr, /* to_upper */ | ||
| 11848 | nullptr, /* sort_order */ | ||
| 11849 | &my_uca_v900, /* uca */ | ||
| 11850 | nullptr, /* tab_to_uni */ | ||
| 11851 | nullptr, /* tab_from_uni */ | ||
| 11852 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11853 | nullptr, /* state_map */ | ||
| 11854 | nullptr, /* ident_map */ | ||
| 11855 | 0, /* strxfrm_multiply */ | ||
| 11856 | 1, /* caseup_multiply */ | ||
| 11857 | 1, /* casedn_multiply */ | ||
| 11858 | 1, /* mbminlen */ | ||
| 11859 | 4, /* mbmaxlen */ | ||
| 11860 | 1, /* mbmaxlenlen */ | ||
| 11861 | 9, /* min_sort_char */ | ||
| 11862 | 0x10FFFF, /* max_sort_char */ | ||
| 11863 | ' ', /* pad char */ | ||
| 11864 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11865 | 1, /* levels_for_compare */ | ||
| 11866 | &my_charset_utf8mb4_handler, | ||
| 11867 | &my_collation_uca_900_handler, | ||
| 11868 | NO_PAD}; | ||
| 11869 | |||
| 11870 | CHARSET_INFO my_charset_utf8mb4_gl_0900_as_cs = { | ||
| 11871 | 321, | ||
| 11872 | 0, | ||
| 11873 | 0, /* number */ | ||
| 11874 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11875 | MY_UTF8MB4, /* csname */ | ||
| 11876 | MY_UTF8MB4 "_gl_0900_as_cs", /* name */ | ||
| 11877 | "", /* comment */ | ||
| 11878 | spanish, /* tailoring */ | ||
| 11879 | nullptr, /* coll_param */ | ||
| 11880 | ctype_utf8, /* ctype */ | ||
| 11881 | nullptr, /* to_lower */ | ||
| 11882 | nullptr, /* to_upper */ | ||
| 11883 | nullptr, /* sort_order */ | ||
| 11884 | &my_uca_v900, /* uca */ | ||
| 11885 | nullptr, /* tab_to_uni */ | ||
| 11886 | nullptr, /* tab_from_uni */ | ||
| 11887 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11888 | nullptr, /* state_map */ | ||
| 11889 | nullptr, /* ident_map */ | ||
| 11890 | 0, /* strxfrm_multiply */ | ||
| 11891 | 1, /* caseup_multiply */ | ||
| 11892 | 1, /* casedn_multiply */ | ||
| 11893 | 1, /* mbminlen */ | ||
| 11894 | 4, /* mbmaxlen */ | ||
| 11895 | 1, /* mbmaxlenlen */ | ||
| 11896 | 9, /* min_sort_char */ | ||
| 11897 | 0x10FFFF, /* max_sort_char */ | ||
| 11898 | ' ', /* pad char */ | ||
| 11899 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11900 | 3, /* levels_for_compare */ | ||
| 11901 | &my_charset_utf8mb4_handler, | ||
| 11902 | &my_collation_uca_900_handler, | ||
| 11903 | NO_PAD}; | ||
| 11904 | |||
| 11905 | CHARSET_INFO my_charset_utf8mb4_mn_cyrl_0900_ai_ci = { | ||
| 11906 | 322, | ||
| 11907 | 0, | ||
| 11908 | 0, /* number */ | ||
| 11909 | MY_CS_UTF8MB4_UCA_FLAGS, /* state */ | ||
| 11910 | MY_UTF8MB4, /* csname */ | ||
| 11911 | MY_UTF8MB4 "_mn_cyrl_0900_ai_ci", /* name */ | ||
| 11912 | "", /* comment */ | ||
| 11913 | "", /* tailoring */ | ||
| 11914 | &ru_coll_param, /* coll_param */ | ||
| 11915 | ctype_utf8, /* ctype */ | ||
| 11916 | nullptr, /* to_lower */ | ||
| 11917 | nullptr, /* to_upper */ | ||
| 11918 | nullptr, /* sort_order */ | ||
| 11919 | &my_uca_v900, /* uca */ | ||
| 11920 | nullptr, /* tab_to_uni */ | ||
| 11921 | nullptr, /* tab_from_uni */ | ||
| 11922 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11923 | nullptr, /* state_map */ | ||
| 11924 | nullptr, /* ident_map */ | ||
| 11925 | 0, /* strxfrm_multiply */ | ||
| 11926 | 1, /* caseup_multiply */ | ||
| 11927 | 1, /* casedn_multiply */ | ||
| 11928 | 1, /* mbminlen */ | ||
| 11929 | 4, /* mbmaxlen */ | ||
| 11930 | 1, /* mbmaxlenlen */ | ||
| 11931 | 32, /* min_sort_char */ | ||
| 11932 | 0x10FFFF, /* max_sort_char */ | ||
| 11933 | ' ', /* pad char */ | ||
| 11934 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11935 | 1, /* levels_for_compare */ | ||
| 11936 | &my_charset_utf8mb4_handler, | ||
| 11937 | &my_collation_uca_900_handler, | ||
| 11938 | NO_PAD}; | ||
| 11939 | |||
| 11940 | CHARSET_INFO my_charset_utf8mb4_mn_cyrl_0900_as_cs = { | ||
| 11941 | 323, | ||
| 11942 | 0, | ||
| 11943 | 0, /* number */ | ||
| 11944 | MY_CS_UTF8MB4_UCA_FLAGS | MY_CS_CSSORT, /* state */ | ||
| 11945 | MY_UTF8MB4, /* csname */ | ||
| 11946 | MY_UTF8MB4 "_mn_cyrl_0900_as_cs", /* name */ | ||
| 11947 | "", /* comment */ | ||
| 11948 | "", /* tailoring */ | ||
| 11949 | &ru_coll_param, /* coll_param */ | ||
| 11950 | ctype_utf8, /* ctype */ | ||
| 11951 | nullptr, /* to_lower */ | ||
| 11952 | nullptr, /* to_upper */ | ||
| 11953 | nullptr, /* sort_order */ | ||
| 11954 | &my_uca_v900, /* uca */ | ||
| 11955 | nullptr, /* tab_to_uni */ | ||
| 11956 | nullptr, /* tab_from_uni */ | ||
| 11957 | &my_unicase_unicode900, /* caseinfo */ | ||
| 11958 | nullptr, /* state_map */ | ||
| 11959 | nullptr, /* ident_map */ | ||
| 11960 | 0, /* strxfrm_multiply */ | ||
| 11961 | 1, /* caseup_multiply */ | ||
| 11962 | 1, /* casedn_multiply */ | ||
| 11963 | 1, /* mbminlen */ | ||
| 11964 | 4, /* mbmaxlen */ | ||
| 11965 | 1, /* mbmaxlenlen */ | ||
| 11966 | 32, /* min_sort_char */ | ||
| 11967 | 0x10FFFF, /* max_sort_char */ | ||
| 11968 | ' ', /* pad char */ | ||
| 11969 | false, /* escape_with_backslash_is_dangerous */ | ||
| 11970 | 3, /* levels_for_compare */ | ||
| 11971 | &my_charset_utf8mb4_handler, | ||
| 11972 | &my_collation_uca_900_handler, | ||
| 11973 | NO_PAD}; | ||
| 11974 |